Imports & Reproducibility¶
Ensure Box2D for LunarLander
!pip -q install swig
!pip -q install "gymnasium[box2d]"
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.9/1.9 MB 29.3 MB/s eta 0:00:00 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 374.4/374.4 kB 8.3 MB/s eta 0:00:00 Preparing metadata (setup.py) ... done Building wheel for box2d-py (setup.py) ... done
Imports
import os, sys, math, time, random, warnings, json, platform, traceback, queue, csv
from dataclasses import dataclass
from typing import Tuple, List, Dict, Any
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.multiprocessing as mp
from torch.distributions import Categorical
import gymnasium as gym
from gymnasium.wrappers import RecordVideo
import matplotlib.pyplot as plt
from collections import deque
Reproducibility
SEED = 1227
os.environ["PYTHONHASHSEED"] = str(SEED)
def set_global_seeds(seed: int = SEED):
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
set_global_seeds(SEED)
mp.set_start_method("fork", force=True)
device = torch.device("cpu")
Artifacts directories
ART_DIR = "part2_artifacts"
VID_DIR = os.path.join(ART_DIR, "videos")
CKPT_DIR = os.path.join(ART_DIR, "checkpoints")
os.makedirs(ART_DIR, exist_ok=True)
os.makedirs(VID_DIR, exist_ok=True)
os.makedirs(CKPT_DIR, exist_ok=True)
1. Environment factory & seeding workers¶
Defines a clean setup for environments with deterministic yet distinct randomness. ENV_ID is set to LunarLander-v3. Each worker gets its own environment and a unique seed so parallel rollouts are reproducible without being identical. The seeding routine initializes the environment via a reset, seeds the action space when available, and also sets local randomness in that process. In addition to the worker env, we include a lightweight evaluation env for metrics and a dedicated video env configured for recording a single greedy episode. This keeps training, evaluation, and recording clearly separated and consistent.
ENV_ID = "LunarLander-v3"
def worker_seed(worker_id: int, base_seed: int) -> int:
"""
Deterministic, distinct seed per worker/process.
"""
return int(base_seed + 10_000 * worker_id)
def set_env_seed(env: gym.Env, seed: int):
"""
Seed the environment RNG and local process RNGs.
"""
obs, info = env.reset(seed=seed)
if hasattr(env.action_space, "seed"):
env.action_space.seed(seed)
np.random.seed(seed)
random.seed(seed)
return obs, info
def make_env(env_id: str = ENV_ID, worker_id: int = 0, base_seed: int = 1227) -> gym.Env:
"""
Build a fresh env instance for a specific worker (actor-learner process).
"""
env = gym.make(env_id)
_ = set_env_seed(env, worker_seed(worker_id, base_seed))
return env
def make_eval_env(env_id: str = ENV_ID, base_seed: int = 1227) -> gym.Env:
"""
Eval en: We still seed deterministically.
"""
env = gym.make(env_id)
_ = set_env_seed(env, base_seed)
return env
def make_video_env(env_id: str = ENV_ID, video_root: str = "part2_artifacts/videos", run_id: str = "run", base_seed: int = 1227 ,name_prefix: str | None = None) -> gym.Env:
video_dir = os.path.join(video_root, run_id)
os.makedirs(video_dir, exist_ok=True)
env = gym.make(env_id, render_mode="rgb_array")
prefix = name_prefix if name_prefix is not None else run_id
env = RecordVideo(env, video_folder=video_dir, name_prefix=prefix, episode_trigger=lambda ep: True)
_ = set_env_seed(env, base_seed)
return env
2. Actor–Critic network¶
Defines a compact actor–critic for LunarLander with a two-layer ReLU shared representation of the eight-dimensional state. From this shared features, the policy head outputs logits over the four actions and the value head predicts a single V(s). All layers use orthogonal initialization with gains chosen for stable learning, keeping the policy head conservative early on and the value head reliable for regression. A small model configuration and builder ensure training and evaluation recreate the exact same architecture for reproducible weight loading. The forward pass returns logits and value in batch form, a helper constructs the policy distribution when needed, and greedy_action runs in evaluation mode to pick the highest-logit action for deterministic tests.
@dataclass
class ModelConfig:
obs_dim: int = 8 # LunarLander observation size
n_actions: int = 4 # Discrete(4)
hidden_sizes: tuple = (256, 256)
class ActorCritic(nn.Module):
def __init__(self, obs_dim: int, n_actions: int, hidden_sizes=(256, 256)):
super().__init__()
h1, h2 = hidden_sizes
self.torso = nn.Sequential(
nn.Linear(obs_dim, h1), nn.ReLU(),
nn.Linear(h1, h2), nn.ReLU(),
)
self.policy_head = nn.Linear(h2, n_actions) # logits
self.value_head = nn.Linear(h2, 1) # V(s)
# Orthogonal initialization
for m in self.modules():
if isinstance(m, nn.Linear) and m is not self.policy_head and m is not self.value_head:
nn.init.orthogonal_(m.weight, gain=nn.init.calculate_gain('relu'))
nn.init.zeros_(m.bias)
nn.init.orthogonal_(self.policy_head.weight, gain=0.01)
nn.init.zeros_(self.policy_head.bias)
nn.init.orthogonal_(self.value_head.weight, gain=1.0)
nn.init.zeros_(self.value_head.bias)
def forward(self, x: torch.Tensor):
x = x.float()
z = self.torso(x)
logits = self.policy_head(z) # (B, n_actions)
value = self.value_head(z).squeeze(-1) # (B,)
return logits, value
@staticmethod
def dist_from_logits(logits: torch.Tensor) -> Categorical:
return Categorical(logits=logits)
@torch.no_grad()
def greedy_action(self, obs_np: np.ndarray) -> int:
self.eval()
x = torch.tensor(obs_np, dtype=torch.float32).unsqueeze(0)
logits, _ = self(x)
return int(torch.argmax(logits, dim=1).item())
def build_model_from_config(cfg: ModelConfig) -> ActorCritic:
return ActorCritic(cfg.obs_dim, cfg.n_actions, cfg.hidden_sizes)
# Instantiate a model for LunarLander
model_cfg = ModelConfig(obs_dim=8, n_actions=4, hidden_sizes=(256, 256))
ac_model = build_model_from_config(model_cfg).to(device)
print(f"[Model] ActorCritic on {device} | obs_dim={model_cfg.obs_dim} n_actions={model_cfg.n_actions} hidden={model_cfg.hidden_sizes}")
[Model] ActorCritic on cpu | obs_dim=8 n_actions=4 hidden=(256, 256)
3. Rollout buffer & n-step targets¶
Defines a per-worker, on-policy buffer that records (s, a, r, terminated, truncated, value, logπ) and converts them into the training signals for A2C. We compute bootstrapped returns for stability on partial rollouts and advantages for a lower-variance policy gradient. If a transition ends the episode by a true terminal event, the return does not bootstrap past that point, if the rollout stops due to a time limit (or simply because we reached the rollout horizon), we do bootstrap from the critic’s prediction at the final state (last_value). The buffer also provides a helper that emits device-ready tensors so the training step can use them directly.
$$
G_t = r_t + \gamma (1-\mathrm{terminated}_t)G_{t+1}
$$
If the rollout ends by time limit (not terminal), set $(G_T = {last \_ value})$; if terminal $(G_T = 0)$.
$$ A_t = G_t - V(s_t) $$
@dataclass
class StepRecord:
s: np.ndarray # state at t
a: int # action taken at t
r: float # reward at t
terminated: bool # True if episode ended by terminal condition at t+1
truncated: bool # True if episode cut by time-limit (or external) at t+1
value: float # V(s_t) predicted at time t
logpi: float # log pi(a_t | s_t) at time t (for diagnostics if needed)
class RolloutBuffer:
def __init__(self):
self.s, self.a, self.r = [], [], []
self.terminated, self.truncated = [], []
self.values, self.logpi = [], []
def add(self, rec: StepRecord):
self.s.append(rec.s)
self.a.append(rec.a)
self.r.append(rec.r)
self.terminated.append(rec.terminated)
self.truncated.append(rec.truncated)
self.values.append(rec.value)
self.logpi.append(rec.logpi)
def __len__(self): # number of steps collected
return len(self.r)
def clear(self):
self.s.clear(); self.a.clear(); self.r.clear()
self.terminated.clear(); self.truncated.clear()
self.values.clear(); self.logpi.clear()
def build_targets(self, last_value: float, gamma: float = 0.99):
"""
Compute bootstrapped returns G_t and advantages A_t.
'last_value' is V(s_T) if the rollout ended by time limit/cutoff (not terminal), else 0.
"""
T = len(self.r)
returns = np.zeros(T, dtype=np.float32)
values = np.asarray(self.values, dtype=np.float32)
term = np.asarray(self.terminated, dtype=np.float32)
# Start from bootstrap (last_value if NOT terminal on the last transition)
g = float(last_value)
for t in reversed(range(T)):
g = self.r[t] + gamma * g * (1.0 - term[t])
returns[t] = g
advantages = returns - values
return returns, advantages
def as_tensors(self, returns, advantages, device=device):
s = torch.tensor(np.array(self.s, dtype=np.float32), device=device)
a = torch.tensor(np.array(self.a, dtype=np.int64), device=device)
r = torch.tensor(np.array(self.r, dtype=np.float32), device=device)
ter= torch.tensor(np.array(self.terminated, dtype=np.float32), device=device)
tru= torch.tensor(np.array(self.truncated, dtype=np.float32), device=device)
v = torch.tensor(np.array(self.values, dtype=np.float32), device=device)
lp = torch.tensor(np.array(self.logpi, dtype=np.float32), device=device)
G = torch.tensor(np.array(returns, dtype=np.float32), device=device)
A = torch.tensor(np.array(advantages, dtype=np.float32), device=device)
return s, a, r, ter, tru, v, lp, G, A
4. Worker rollout (T-step on-policy data)¶
Collects a short on-policy rollout from one environment. At each step, it samples an action from the policy, records the policy log-prob and the critic value, and appends (s, a, r, terminated, truncated, value, logπ) to the per-worker buffer. The loop runs for up to T steps or stops early if the episode ends. If the episode ends by a true terminal event, the bootstrap target is zero, if it ends by time limit or simply reaches the T-step cutoff, we bootstrap from the critic at the final state. The function returns the filled buffer, the bootstrap value, the next observation to continue from, and basic episode stats for logging.
def _to_tensor(obs_np: np.ndarray) -> torch.Tensor:
return torch.tensor(obs_np, dtype=torch.float32, device=device).unsqueeze(0)
def rollout_T_steps(
env: gym.Env,
model: nn.Module,
obs: np.ndarray,
T: int = 20
) -> Tuple[RolloutBuffer, float, np.ndarray, bool, float, int]:
buffer = RolloutBuffer()
ep_return, ep_len = 0.0, 0
done = False
last_terminated = False
last_truncated = False
for _ in range(T):
x = _to_tensor(obs)
logits, value = model(x)
dist = Categorical(logits=logits)
a = dist.sample()
logpi = float(dist.log_prob(a).item())
v = float(value.squeeze(0).item())
a_int = int(a.item())
next_obs, r, terminated, truncated, _ = env.step(a_int)
done = bool(terminated or truncated)
buffer.add(
StepRecord(
s=obs, a=a_int, r=float(r),
terminated=bool(terminated), truncated=bool(truncated),
value=v, logpi=logpi
)
)
ep_return += float(r)
ep_len += 1
obs = next_obs
last_terminated = bool(terminated)
last_truncated = bool(truncated)
if done:
break
# Determine bootstrap value:
# If we ended on a true terminal transition -> no bootstrap (0.0)
# If we stopped by time-limit/truncation or simply hit T steps -> bootstrap from V(s_T)
if done and last_terminated:
last_value = 0.0
else:
with torch.no_grad():
xT = _to_tensor(obs)
_, vT = model(xT)
last_value = float(vT.squeeze(0).item())
return buffer, last_value, obs, done, ep_return, ep_len
5. A2C loss and single-batch update¶
Turns one rollout into a single A2C update. We recompute policy logits and values on the collected states, form the policy loss from the advantage signal (normalized to reduce variance), the value loss as MSE to the bootstrapped returns, and add an entropy bonus to sustain exploration. We then backpropagate the combined objective, apply gradient clipping, and step the optimizer. Advantages are treated as a fixed baseline for the policy term (detached) so the critic does not leak gradients into the actor. No artificial clamping of logits or returns is used, stability is handled by normalization and grad-norm clipping. The function is self-contained so it can be called per worker (A3C) or after aggregating rollouts (A2C).
$$ L_{\text{policy}} = -\,\mathbb{E}_t\!\left[\log \pi_\theta(a_t \mid s_t)\,\operatorname{stopgrad}(A_t)\right] $$
$$ L_{\text{value}} = \mathbb{E}_t\!\left[\big(G_t - V_\theta(s_t)\big)^2\right] $$
$$ \mathcal{L} = L_{\text{policy}} + c_v\,L_{\text{value}} - c_e\,\mathbb{E}_t\!\left[H\!\big(\pi_\theta(\cdot \mid s_t)\big)\right] $$
from torch.nn.utils import clip_grad_norm_
@torch.no_grad()
def _normalize_advantages(A: torch.Tensor, eps: float = 1e-8) -> torch.Tensor:
# Re-center and scale for numerical stability
A = A - A.mean()
std = A.std(unbiased=False)
return A / (std + eps)
def a2c_update(
model: nn.Module,
optimizer: torch.optim.Optimizer,
s: torch.Tensor, # (T, obs_dim)
a: torch.Tensor, # (T,)
G: torch.Tensor, # (T,) bootstrapped returns
A: torch.Tensor, # (T,) raw advantages (G - V)
*,
normalize_adv: bool = True,
entropy_coef: float = 0.01,
value_coef: float = 0.5,
max_grad_norm: float = 0.5
) -> Dict[str, float]:
"""
Perform one A2C update from a rollout segment. Returns scalars for logging.
"""
model.train()
# Forward pass under current parameters
logits, values = model(s) # logits: (T, nA), values: (T,)
dist = Categorical(logits=logits)
logpi = dist.log_prob(a) # (T,)
entropy = dist.entropy().mean()
# Advantage normalization
adv = A
if normalize_adv:
adv = _normalize_advantages(adv)
# Losses
policy_loss = -(logpi * adv.detach()).mean()
# No clamping of returns; let critic learn true scale
value_loss = F.mse_loss(values, G)
loss = policy_loss + value_coef * value_loss - entropy_coef * entropy
# Backprop, clip, step
optimizer.zero_grad(set_to_none=True)
loss.backward()
grad_norm = clip_grad_norm_(model.parameters(), max_grad_norm)
optimizer.step()
return {
"loss": float(loss.item()),
"policy_loss": float(policy_loss.item()),
"value_loss": float(value_loss.item()),
"entropy": float(entropy.item()),
"grad_norm": float(grad_norm if torch.is_tensor(grad_norm) else grad_norm),
}
6. Multi-worker A2C¶
Defines the long-running worker used for multi-process A2C. Each worker creates its own LunarLander environment with a unique deterministic seed and holds a local CPU copy of the actor–critic. The main process sends a “rollout” command with the latest weights and a horizon T. The worker loads those weights, collects exactly T on-policy steps (resetting and continuing if an episode finishes), and records per-step fields state, action, reward, terminal/time-limit flags, critic value, and policy log-probability. For bootstrapping, if the last transition was terminal the return does not bootstrap, otherwise it bootstraps from the critic at the final state. The worker returns these arrays along with any episodic returns/lengths observed in the segment. A “close” command cleanly shuts the worker down.
def worker_proc(
worker_id: int,
cmd_queue, # mp.Queue
res_queue, # mp.Queue (for rollout payloads)
err_queue, # mp.Queue (for exception tracebacks)
env_id: str,
base_seed: int,
model_cfg_dict: Dict[str, Any], # plain dict: {"obs_dim":..., "n_actions":..., "hidden_sizes":(...) }
):
try:
env = make_env(env_id=env_id, worker_id=worker_id, base_seed=base_seed)
obs, _ = env.reset()
# Local CPU model
local_device = torch.device("cpu")
model = ActorCritic(
model_cfg_dict["obs_dim"],
model_cfg_dict["n_actions"],
tuple(model_cfg_dict["hidden_sizes"])
).to(local_device)
model.eval()
while True:
cmd = cmd_queue.get()
if cmd is None or cmd.get("type") == "close":
break
if cmd.get("type") != "rollout":
continue
# Load latest CPU weights
model.load_state_dict(cmd["state_dict"])
model.eval()
T = int(cmd.get("T", 20))
buf = RolloutBuffer()
seg_ep_returns: List[float] = []
seg_ep_lengths: List[int] = []
cur_ep_return, cur_ep_len = 0.0, 0
last_terminated = False
last_truncated = False
# Collect exactly T steps (might span multiple episodes)
for _ in range(T):
x = torch.tensor(obs, dtype=torch.float32, device=local_device).unsqueeze(0)
with torch.no_grad():
logits, value = model(x)
dist = Categorical(logits=logits)
a = dist.sample()
logpi = float(dist.log_prob(a).item())
v = float(value.squeeze(0).item())
a_int = int(a.item())
next_obs, r, terminated, truncated, _ = env.step(a_int)
buf.add(
StepRecord(
s=obs,
a=a_int,
r=float(r),
terminated=bool(terminated),
truncated=bool(truncated),
value=v,
logpi=logpi,
)
)
cur_ep_return += float(r)
cur_ep_len += 1
obs = next_obs
last_terminated = bool(terminated)
last_truncated = bool(truncated)
if terminated or truncated:
seg_ep_returns.append(cur_ep_return)
seg_ep_lengths.append(cur_ep_len)
cur_ep_return, cur_ep_len = 0.0, 0
obs, _ = env.reset()
# Bootstrap for last transition of this segment
if last_terminated:
last_value = 0.0
else:
with torch.no_grad():
xT = torch.tensor(obs, dtype=torch.float32, device=local_device).unsqueeze(0)
_, vT = model(xT)
last_value = float(vT.squeeze(0).item())
res_queue.put({
"worker_id": worker_id,
"s": buf.s,
"a": buf.a,
"r": buf.r,
"terminated": buf.terminated,
"truncated": buf.truncated,
"values": buf.values,
"logpi": buf.logpi,
"last_value": last_value,
"ep_returns": seg_ep_returns,
"ep_lengths": seg_ep_lengths,
"T": T,
})
except Exception:
# Surface the traceback to the main process
err_queue.put({
"worker_id": worker_id,
"traceback": traceback.format_exc()
})
finally:
try:
env.close()
except Exception:
pass
7. Checkpoint utilities¶
This cell adds portable, reproducible checkpoints. Instead of saving only weights, we save a single file that includes the model’s exact architecture, the training hyperparameters, software versions, and optional random-number generator states. The save helper writes this rich payload so future runs can rebuild the same network and evaluation setup without guessing settings, and the load helper restores either the rich format or a legacy plain state dict. Together, these utilities let us reload a checkpoint later and reproduce the 10-episode greedy evaluation and the one-episode video on the same software stack.
def _software_versions():
return {
"python": sys.version.split()[0],
"platform": platform.platform(),
"torch": torch.__version__,
"gymnasium": gym.__version__,
"numpy": np.__version__,
"device": str(device),
"env_id": ENV_ID,
}
def _rng_capture():
return {
"python_random": random.getstate(),
"numpy_random": np.random.get_state(),
"torch_cpu": torch.get_rng_state().tolist(),
}
def _rng_restore(rng_state: dict):
try:
random.setstate(rng_state["python_random"])
np.random.set_state(rng_state["numpy_random"])
torch.set_rng_state(torch.tensor(rng_state["torch_cpu"], dtype=torch.uint8))
except Exception as e:
print(f"[Checkpoint] RNG restore failed (non-fatal): {e}")
def save_checkpoint(
path: str,
model: nn.Module,
*,
model_cfg: ModelConfig,
train_config: dict,
steps_total: int,
include_rng: bool = True,
notes: str = ""
):
payload = {
"format": "a2c_self_describing_v1",
"created_at": time.strftime("%Y-%m-%d %H:%M:%S"),
"state_dict": {k: v.cpu() for k, v in model.state_dict().items()},
"model_cfg": {
"obs_dim": model_cfg.obs_dim,
"n_actions": model_cfg.n_actions,
"hidden_sizes": tuple(model_cfg.hidden_sizes),
},
"train_config": dict(train_config),
"software": _software_versions(),
"steps_total": int(steps_total),
"seed": int(SEED),
"notes": str(notes),
}
if include_rng:
payload["rng_state"] = _rng_capture()
torch.save(payload, path)
print(f"[Checkpoint] Saved self-describing checkpoint → {path}")
def load_checkpoint(path: str):
try:
obj = torch.load(path, map_location="cpu", weights_only=False)
except TypeError:
obj = torch.load(path, map_location="cpu")
if isinstance(obj, dict) and "state_dict" in obj and obj.get("format", "").startswith("a2c_self_describing_"):
payload = obj
state_dict = payload["state_dict"]
cfg = payload.get("model_cfg", {"obs_dim": 8, "n_actions": 4, "hidden_sizes": (256, 256)})
return {"payload": payload, "state_dict": state_dict, "model_cfg": cfg}
else:
print("[Checkpoint] Loaded legacy state_dict; using current notebook model_cfg.")
return {"payload": None, "state_dict": obj, "model_cfg": {
"obs_dim": model_cfg.obs_dim,
"n_actions": model_cfg.n_actions,
"hidden_sizes": tuple(model_cfg.hidden_sizes)
}}
8. Multi-worker A2C - main trainer (synchronous)¶
Launches multiple workers and coordinates synchronous A2C updates. In each iteration, the main process broadcasts the latest CPU weights and a rollout horizon T. Each worker collects exactly T on-policy steps (resetting and continuing if an episode ends) and returns per-step arrays with terminal/time-limit flags plus a bootstrap value. The trainer builds bootstrapped returns and advantages from each segment, concatenates them into one batch, performs a single A2C update (policy + value + entropy with gradient clipping), logs compact statistics, and repeats until the target number of environment steps is reached. At the end, it saves the weights and cleanly shuts down the workers.
def train_a2c_multi(
n_workers: int = 2,
total_env_steps: int = 200_000,
T: int = 20,
gamma: float = 0.99,
entropy_coef: float = 0.02,
value_coef: float = 0.25,
max_grad_norm: float = 0.5,
lr: float = 7e-4,
log_every: int = 10_000,
save_path: str = os.path.join(CKPT_DIR, "a2c_lander_multi.pth"),
):
# Authoritative CPU model
model = build_model_from_config(model_cfg).to(torch.device("cpu"))
optimizer = torch.optim.RMSprop(model.parameters(), lr=lr, alpha=0.99, eps=1e-5)
start_method = "fork"
ctx = mp.get_context(start_method)
cmd_queues = [ctx.Queue() for _ in range(n_workers)]
res_queue = ctx.Queue()
err_queue = ctx.Queue()
procs = []
# Pass a plain dict cfg to avoid pickling issues
model_cfg_dict = {
"obs_dim": model_cfg.obs_dim,
"n_actions": model_cfg.n_actions,
"hidden_sizes": tuple(model_cfg.hidden_sizes),
}
for wid in range(n_workers):
p = ctx.Process(
target=worker_proc,
args=(wid, cmd_queues[wid], res_queue, err_queue, ENV_ID, SEED, model_cfg_dict),
daemon=True,
)
p.start()
procs.append(p)
steps_total = 0
iteration = 0
wall_start = time.time()
# Global histories
ep_returns_deque = deque(maxlen=500) # rolling window
ep_returns_all: list[float] = [] # full global history
# per-worker episode histories (for the plots)
worker_returns: dict[int, list[float]] = {wid: [] for wid in range(n_workers)}
last_log_steps = 0
# Per-worker episode counters for lightweight progress prints
worker_ep_seen = {wid: 0 for wid in range(n_workers)}
train_config = {
"n_workers": n_workers, "total_env_steps": total_env_steps, "T": T,
"gamma": gamma, "entropy_coef": entropy_coef, "value_coef": value_coef,
"max_grad_norm": max_grad_norm, "lr": lr, "log_every": log_every,
"start_method": start_method,
}
print(f"[A2C][sync] start: workers={n_workers}, T={T}, target_steps={total_env_steps}, mp={start_method}", flush=True)
try:
while steps_total < total_env_steps:
iteration += 1
# Broadcast latest CPU weights
cpu_state = {k: v.cpu() for k, v in model.state_dict().items()}
for q in cmd_queues:
q.put({"type": "rollout", "state_dict": cpu_state, "T": T})
# Collect rollouts; surface worker errors if any
pkgs = []
batch_steps = 0
timeout_s = max(60.0, 5.0 * n_workers)
for _ in range(n_workers):
got_pkg = False
t0 = time.time()
while not got_pkg:
try:
pkg = res_queue.get(timeout=1.0)
pkgs.append(pkg)
# Basic accounting
seg_steps = len(pkg["r"])
batch_steps += seg_steps
steps_total += seg_steps
# Episode returns (rolling + full history + per-worker)
if pkg.get("ep_returns"):
ep_returns_deque.extend(pkg["ep_returns"])
ep_returns_all.extend(pkg["ep_returns"])
wid = pkg.get("worker_id", None)
if wid is not None and wid in worker_returns:
# keep per-worker episode returns
worker_returns[wid].extend([float(x) for x in pkg["ep_returns"]])
# Progress print (every ~10 episodes per worker)
worker_ep_seen[wid] += len(pkg["ep_returns"])
if (worker_ep_seen[wid] // 10) != ((worker_ep_seen[wid] - len(pkg["ep_returns"])) // 10):
last_ret = float(pkg["ep_returns"][-1])
print(
f"[worker {wid}] episodes_seen={worker_ep_seen[wid]} "
f"last_return={last_ret:.1f} (+{len(pkg['ep_returns'])} eps)",
flush=True
)
got_pkg = True
except queue.Empty:
# Check if any worker reported an error
if not err_queue.empty():
err = err_queue.get()
raise RuntimeError(
f"[Worker {err.get('worker_id')}] crashed:\n{err.get('traceback')}"
)
if time.time() - t0 > timeout_s:
raise TimeoutError("Timed out waiting for worker rollouts (no payload received).")
# Build batch
S_list, Aidx_list, G_list, Adv_list = [], [], [], []
for pkg in pkgs:
buf = RolloutBuffer()
for i in range(len(pkg["r"])):
buf.add(
StepRecord(
s=pkg["s"][i],
a=int(pkg["a"][i]),
r=float(pkg["r"][i]),
terminated=bool(pkg["terminated"][i]),
truncated=bool(pkg["truncated"][i]),
value=float(pkg["values"][i]),
logpi=float(pkg["logpi"][i]),
)
)
returns, adv = buf.build_targets(last_value=float(pkg["last_value"]), gamma=gamma)
s, a, _, _, _, _, _, G, A = buf.as_tensors(returns, adv, device=torch.device("cpu"))
S_list.append(s); Aidx_list.append(a); G_list.append(G); Adv_list.append(A)
s_all = torch.cat(S_list, dim=0)
a_all = torch.cat(Aidx_list, dim=0)
G_all = torch.cat(G_list, dim=0)
A_all = torch.cat(Adv_list, dim=0)
# Update
stats = a2c_update(
model, optimizer, s_all, a_all, G_all, A_all,
normalize_adv=True,
entropy_coef=entropy_coef,
value_coef=value_coef,
max_grad_norm=max_grad_norm,
)
# Logging
if iteration == 1 or (steps_total - last_log_steps) >= log_every or steps_total >= total_env_steps:
avg10 = float(np.mean(list(ep_returns_deque)[-10:])) if ep_returns_deque else float("nan")
print(
f"[A2C][sync] it={iteration:>5} steps={steps_total:>8} (+{batch_steps:>3}) "
f"avg10={avg10:7.2f} loss={stats['loss']:.3f} pg={stats['policy_loss']:.3f} "
f"vf={stats['value_loss']:.3f} H={stats['entropy']:.3f} gn={stats['grad_norm']:.3f}",
flush=True
)
last_log_steps = steps_total
# Save rich checkpoint
save_checkpoint(
save_path, model,
model_cfg=model_cfg,
train_config=train_config,
steps_total=steps_total,
include_rng=True,
notes="A2C LunarLander-v3 multi-worker (CPU)."
)
wall_time = time.time() - wall_start
avg10 = float(np.mean(list(ep_returns_deque)[-10:])) if ep_returns_deque else float("nan")
print(f"[A2C][sync] done: steps={steps_total} time={wall_time:.1f}s avg10={avg10:.2f}", flush=True)
finally:
for q in cmd_queues:
q.put({"type": "close"})
for p in procs:
p.join(timeout=5)
# Return histories for plotting
return model, {
"global_returns": list(ep_returns_deque),
"global_returns_last500": list(ep_returns_deque),
"global_returns_all": ep_returns_all,
"worker_returns": worker_returns,
"steps_total": steps_total,
}
9. Training run & learning-curve plot¶
Runs the multi-worker A2C trainer end-to-end and saves the artifacts for this run. It launches several workers, aggregates fixed-length rollouts, performs one A2C update per aggregation, and repeats until the target number of environment steps is reached. As training progresses, it records completed-episode returns and produces a learning-curve figure that shows raw episode returns with a moving-average overlay. All saved under the artifacts folder with a clear run_id in the filenames so multiple experiments don’t overwrite each other.
def _moving_average(x, w: int):
if not x:
return []
w = max(1, int(w))
out, csum = [], 0.0
for i, v in enumerate(x):
csum += float(v)
if i >= w:
csum -= float(x[i - w])
out.append(csum / min(i + 1, w))
return out
@dataclass
class TrainRunPaths:
ckpt_path: str
train_plot_path: str
def _normalize_worker_returns(logs: Dict[str, Any]) -> List[List[float]] | None:
wr = logs.get("worker_returns") or logs.get("per_worker_returns")
if wr is None:
return None
if isinstance(wr, dict):
# sort by worker id to keep a stable order
keys = sorted(wr.keys())
return [list(map(float, wr[k])) for k in keys]
return [list(map(float, seq)) for seq in wr]
def train_once(
run_id: str,
*,
n_workers: int = 4,
total_env_steps: int = 200_000,
T: int = 20,
gamma: float = 0.99,
entropy_coef: float = 0.02,
value_coef: float = 0.25,
max_grad_norm: float = 0.5,
lr: float = 7e-4,
log_every: int = 10_000
):
ckpt_path = os.path.join(CKPT_DIR, f"a2c_{run_id}.pth")
print(f"[Run {run_id}] starting training…")
model, logs = train_a2c_multi(
n_workers=n_workers,
total_env_steps=total_env_steps,
T=T,
gamma=gamma,
entropy_coef=entropy_coef,
value_coef=value_coef,
max_grad_norm=max_grad_norm,
lr=lr,
log_every=log_every,
save_path=ckpt_path,
)
# Plot: last-500 (tail view)
ep_returns = logs.get("global_returns", [])
ma20 = _moving_average(ep_returns, 20)
plt.figure(figsize=(8, 4.2))
plt.plot(ep_returns, linewidth=1.0, label="Episode return (last 500)")
plt.plot(ma20, linewidth=1.8, label="MA(20)")
plt.title("A2C (multi-worker) - Training returns (final 500 episodes)")
plt.xlabel("Episodes")
plt.ylabel("Return")
plt.legend()
plt.tight_layout()
train_plot_path = os.path.join(ART_DIR, f"train_curve_{run_id}.png")
plt.savefig(train_plot_path, dpi=150)
plt.show()
# Plot: FULL history (global)
full_hist = logs.get("global_returns_all") or ep_returns
ma20_full = _moving_average(full_hist, 20)
plt.figure(figsize=(8, 4.2))
plt.plot(full_hist, linewidth=0.9, label="Episode return (full)")
plt.plot(ma20_full, linewidth=1.8, label="MA(20)")
plt.title("A2C (multi-worker) - Training returns")
plt.xlabel("Episodes")
plt.ylabel("Return")
plt.legend()
plt.tight_layout()
train_plot_full_path = os.path.join(ART_DIR, f"train_curve_full_{run_id}.png")
plt.savefig(train_plot_full_path, dpi=150)
plt.show()
# Plot: per-thread curves (+ MA)
per_worker = _normalize_worker_returns(logs)
if per_worker and any(len(seq) > 0 for seq in per_worker):
plt.figure(figsize=(8, 4.2))
for wid, seq in enumerate(per_worker):
plt.plot(seq, linewidth=0.9, alpha=0.85, label=f"Worker {wid}")
plt.plot(_moving_average(seq, 20), linewidth=1.6, alpha=0.9, label=f"Worker {wid} MA(20)")
plt.title("A2C - Reward per episode per worker")
plt.xlabel("Episodes (per-worker)")
plt.ylabel("Return")
plt.legend(ncols=2, fontsize=9)
plt.tight_layout()
per_worker_path = os.path.join(ART_DIR, f"train_curve_workers_{run_id}.png")
plt.savefig(per_worker_path, dpi=150)
plt.show()
else:
per_worker_path = None
# Plot: average across threads
if per_worker and len(per_worker) >= 2:
min_len = min(len(s) for s in per_worker if len(s) > 0)
if min_len > 0:
stacked = np.stack([np.asarray(s[:min_len], dtype=np.float32) for s in per_worker], axis=0)
avg_across = stacked.mean(axis=0).tolist()
plt.figure(figsize=(8, 4.2))
plt.plot(avg_across, linewidth=1.6, label="Average across workers")
plt.plot(_moving_average(avg_across, 20), linewidth=1.6, label="MA(20)")
plt.title("A2C - Average reward per episode across workers")
plt.xlabel("Episodes")
plt.ylabel("Return")
plt.legend()
plt.tight_layout()
avg_workers_path = os.path.join(ART_DIR, f"train_curve_workers_avg_{run_id}.png")
plt.savefig(avg_workers_path, dpi=150)
plt.show()
else:
avg_workers_path = None
else:
avg_workers_path = None
print(f"[Run {run_id}] checkpoint: {ckpt_path}")
print(f"[Run {run_id}] training plot (tail 500): {train_plot_path}")
print(f"[Run {run_id}] training plot (full): {train_plot_full_path}")
if per_worker_path:
print(f"[Run {run_id}] per-worker plot: {per_worker_path}")
if avg_workers_path:
print(f"[Run {run_id}] workers-average plot: {avg_workers_path}")
return model, logs, TrainRunPaths(ckpt_path=ckpt_path, train_plot_path=train_plot_path)
10. Ten-episode greedy evaluation¶
Loads the saved checkpoint, rebuilds the exact same model, and runs ten deterministic greedy episodes with fixed seeds. It reports summary metrics (mean, std, min, max), saves a small CSV with per-episode returns, and generates a simple evaluation plot.
@dataclass
class EvalRunPaths:
eval_csv_path: str
eval_plot_path: str
def evaluate_10(run_id: str, ckpt_path: str):
# Load checkpoint and rebuild model accordingly
loaded = load_checkpoint(ckpt_path)
cfg = loaded["model_cfg"]
eval_model = ActorCritic(cfg["obs_dim"], cfg["n_actions"], tuple(cfg["hidden_sizes"])).to(device)
eval_model.load_state_dict(loaded["state_dict"], strict=True)
eval_model.eval()
# Restore RNG for eval reproducibility
if loaded["payload"] and "rng_state" in loaded["payload"]:
_rng_restore(loaded["payload"]["rng_state"])
returns, seeds = [], []
for ep in range(10):
seed_ep = SEED + ep
env = make_eval_env(ENV_ID, base_seed=seed_ep)
obs, _ = env.reset(seed=seed_ep)
done = False
total_r = 0.0
while not done:
a = eval_model.greedy_action(obs)
obs, r, terminated, truncated, _ = env.step(a)
total_r += float(r)
done = bool(terminated or truncated)
env.close()
returns.append(total_r)
seeds.append(seed_ep)
# Save CSV with seed column
eval_csv_path = os.path.join(ART_DIR, f"eval10_{run_id}.csv")
with open(eval_csv_path, "w", newline="") as f:
w = csv.DictWriter(f, fieldnames=["episode_idx", "seed", "return"])
w.writeheader()
for i, (seed_ep, ret) in enumerate(zip(seeds, returns)):
w.writerow({"episode_idx": i, "seed": seed_ep, "return": ret})
# Plot and save
plt.figure(figsize=(7.5, 4.0))
plt.plot(returns, marker="o", linewidth=1.0)
plt.title(f"A2C - Greedy evaluation (10 episodes)")
plt.xlabel("Episode"); plt.ylabel("Return"); plt.tight_layout()
eval_plot_path = os.path.join(ART_DIR, f"eval10_{run_id}.png")
plt.savefig(eval_plot_path, dpi=150); plt.show()
metrics = {
"mean": float(np.mean(returns)) if returns else float("nan"),
"std": float(np.std(returns)) if returns else float("nan"),
"min": float(np.min(returns)) if returns else float("nan"),
"max": float(np.max(returns)) if returns else float("nan"),
}
print(f"[Eval {run_id}] mean={metrics['mean']:.2f} std={metrics['std']:.2f} "
f"min={metrics['min']:.2f} max={metrics['max']:.2f}")
print(f"[Eval {run_id}] CSV: {eval_csv_path}")
print(f"[Eval {run_id}] plot: {eval_plot_path}")
return metrics, EvalRunPaths(eval_csv_path=eval_csv_path, eval_plot_path=eval_plot_path)
11. One-episode greedy video¶
Loads the same checkpoint and records a single greedy episode using the video wrapper. The environment is configured for frame capture, the episode is run without exploration, and the resulting MP4 is saved in the videos folder. The console prints the episode return and the directory where the video was written, with run_id embedded in the filename to keep each experiment’s recording separate.
def record_one_video(run_id: str, ckpt_path: str, seed: int):
loaded = load_checkpoint(ckpt_path)
cfg = loaded["model_cfg"]
model = ActorCritic(cfg["obs_dim"], cfg["n_actions"], tuple(cfg["hidden_sizes"])).to(device)
model.load_state_dict(loaded["state_dict"], strict=True)
model.eval()
if loaded["payload"] and "rng_state" in loaded["payload"]:
_rng_restore(loaded["payload"]["rng_state"])
video_env = make_video_env(env_id=ENV_ID, video_root=VID_DIR, run_id=run_id, base_seed=seed)
obs, _ = video_env.reset(seed=seed)
done = False
total_r = 0.0
while not done:
a = model.greedy_action(obs)
obs, r, terminated, truncated, _ = video_env.step(a)
total_r += float(r)
done = bool(terminated or truncated)
video_env.close()
print(f"[Video {run_id}] episode return={total_r:.2f}")
print(f"[Video {run_id}] saved under: {VID_DIR}")
return VID_DIR
12. Find the best landing¶
Selects the strongest episode from the fixed 10-episode evaluation and records exactly that trajectory as a video. The helper reads the saved evaluation CSV for this run, identifies the episode with the highest return, recovers its seed, and invokes the existing video recorder with that seed. The result is a reproducible MP4 of the best landing produced by the trained policy for this run, without manual searching or rerunning episodes.
def record_best_from_eval(run_id: str, ckpt_path: str, art_dir: str = ART_DIR):
eval_csv = os.path.join(art_dir, f"eval10_{run_id}.csv")
with open(eval_csv, "r", newline="") as f:
rows = list(csv.DictReader(f))
if not rows:
raise RuntimeError(f"No rows found in {eval_csv}")
best = max(rows, key=lambda r: float(r["return"]))
best_idx = int(best["episode_idx"])
best_seed = int(best["seed"])
best_ret = float(best["return"])
print(f"[Best] ep={best_idx} return={best_ret:.2f} seed={best_seed}")
video_dir = record_one_video(run_id, ckpt_path, seed=best_seed)
return video_dir, best_idx, best_ret
Training + Plots¶
Run#1
run_id = f"run1_seed{SEED}"
# Train (multi-worker) and save checkpoint & plots
model, logs, paths = train_once(
run_id=run_id,
n_workers=2,
total_env_steps=200_000,
T=20,
gamma=0.99,
entropy_coef=0.02,
value_coef=0.25,
max_grad_norm=0.5,
lr=7e-4,
log_every=20_000,
)
# Fixed-seed greedy evaluation (10 episodes)
metrics, eval_paths = evaluate_10(run_id, paths.ckpt_path)
# Record the best episode from eval-10 using its seed
video_dir, best_idx, best_ret = record_best_from_eval(run_id, paths.ckpt_path)
print(f"{run_id} | mean={metrics['mean']:.1f}±{metrics['std']:.1f} | best_ep={best_idx}, best_ret={best_ret:.1f}")
[Run run1_seed1227] starting training… [A2C][sync] start: workers=2, T=20, target_steps=200000, mp=fork [A2C][sync] it= 1 steps= 40 (+ 40) avg10= nan loss=18.817 pg=-0.000 vf=75.380 H=1.386 gn=6.097 [worker 0] episodes_seen=10 last_return=-126.8 (+1 eps) [worker 1] episodes_seen=10 last_return=-164.4 (+1 eps) [worker 0] episodes_seen=20 last_return=-323.2 (+1 eps) [worker 1] episodes_seen=20 last_return=-207.4 (+1 eps) [worker 1] episodes_seen=30 last_return=-118.9 (+1 eps) [worker 0] episodes_seen=30 last_return=-270.3 (+1 eps) [worker 0] episodes_seen=40 last_return=-153.5 (+1 eps) [worker 1] episodes_seen=40 last_return=-280.3 (+1 eps) [worker 1] episodes_seen=50 last_return=-108.2 (+1 eps) [worker 0] episodes_seen=50 last_return=-128.4 (+1 eps) [worker 0] episodes_seen=60 last_return=-281.5 (+1 eps) [worker 1] episodes_seen=60 last_return=-139.9 (+1 eps) [worker 1] episodes_seen=70 last_return=-328.0 (+1 eps) [worker 0] episodes_seen=70 last_return=-241.7 (+1 eps) [A2C][sync] it= 501 steps= 20040 (+ 40) avg10=-213.38 loss=1451.896 pg=-0.000 vf=5807.583 H=0.000 gn=16571.365 [worker 1] episodes_seen=80 last_return=-221.6 (+1 eps) [worker 0] episodes_seen=80 last_return=-113.7 (+1 eps) [worker 1] episodes_seen=90 last_return=-222.3 (+1 eps) [worker 0] episodes_seen=90 last_return=-179.5 (+1 eps) [worker 1] episodes_seen=100 last_return=-294.1 (+1 eps) [worker 0] episodes_seen=100 last_return=-200.4 (+1 eps) [worker 0] episodes_seen=110 last_return=-241.6 (+1 eps) [worker 1] episodes_seen=110 last_return=-127.6 (+1 eps) [worker 0] episodes_seen=120 last_return=-309.6 (+1 eps) [worker 1] episodes_seen=120 last_return=-190.8 (+1 eps) [worker 0] episodes_seen=130 last_return=-194.0 (+1 eps) [worker 1] episodes_seen=130 last_return=-209.3 (+1 eps) [worker 0] episodes_seen=140 last_return=-228.4 (+1 eps) [worker 1] episodes_seen=140 last_return=-248.9 (+1 eps) [worker 0] episodes_seen=150 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=160 last_return=-168.6 (+1 eps) [worker 1] episodes_seen=150 last_return=-269.6 (+1 eps) [A2C][sync] it= 1001 steps= 40040 (+ 40) avg10=-169.45 loss=1319.658 pg=-0.000 vf=5278.634 H=0.000 gn=23632.676 [worker 1] episodes_seen=160 last_return=-113.7 (+1 eps) [worker 0] episodes_seen=170 last_return=-285.0 (+1 eps) [worker 1] episodes_seen=170 last_return=-454.8 (+1 eps) [worker 0] episodes_seen=180 last_return=-180.2 (+1 eps) [worker 1] episodes_seen=180 last_return=-139.8 (+1 eps) [worker 0] episodes_seen=190 last_return=-225.0 (+1 eps) [worker 1] episodes_seen=190 last_return=-225.9 (+1 eps) [worker 0] episodes_seen=200 last_return=-183.0 (+1 eps) [worker 1] episodes_seen=200 last_return=-163.4 (+1 eps) [worker 0] episodes_seen=210 last_return=-149.6 (+1 eps) [worker 0] episodes_seen=220 last_return=-128.4 (+1 eps) [worker 1] episodes_seen=210 last_return=-301.6 (+1 eps) [worker 0] episodes_seen=230 last_return=-252.8 (+1 eps) [worker 1] episodes_seen=220 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=240 last_return=-115.6 (+1 eps) [worker 1] episodes_seen=230 last_return=-157.5 (+1 eps) [A2C][sync] it= 1501 steps= 60040 (+ 40) avg10=-166.13 loss=18042.904 pg=-0.000 vf=72171.617 H=0.000 gn=23063.143 [worker 0] episodes_seen=250 last_return=-145.1 (+1 eps) [worker 1] episodes_seen=240 last_return=-186.1 (+1 eps) [worker 0] episodes_seen=260 last_return=-154.5 (+1 eps) [worker 1] episodes_seen=250 last_return=-243.7 (+1 eps) [worker 0] episodes_seen=270 last_return=-141.4 (+1 eps) [worker 1] episodes_seen=260 last_return=-226.8 (+1 eps) [worker 0] episodes_seen=280 last_return=-145.4 (+1 eps) [worker 1] episodes_seen=270 last_return=-110.3 (+1 eps) [worker 0] episodes_seen=290 last_return=-186.5 (+1 eps) [worker 1] episodes_seen=280 last_return=-156.8 (+1 eps) [worker 0] episodes_seen=300 last_return=-156.7 (+1 eps) [worker 1] episodes_seen=290 last_return=-258.1 (+1 eps) [worker 0] episodes_seen=310 last_return=-139.9 (+1 eps) [worker 1] episodes_seen=300 last_return=-130.4 (+1 eps) [worker 0] episodes_seen=320 last_return=-146.2 (+1 eps) [worker 1] episodes_seen=310 last_return=-173.2 (+1 eps) [A2C][sync] it= 2001 steps= 80040 (+ 40) avg10=-183.16 loss=9374.838 pg=0.007 vf=37499.324 H=0.067 gn=53402.223 [worker 0] episodes_seen=330 last_return=-236.9 (+1 eps) [worker 1] episodes_seen=320 last_return=-126.0 (+1 eps) [worker 0] episodes_seen=340 last_return=-129.6 (+1 eps) [worker 1] episodes_seen=330 last_return=-196.2 (+1 eps) [worker 0] episodes_seen=350 last_return=-250.7 (+1 eps) [worker 1] episodes_seen=340 last_return=-132.4 (+1 eps) [worker 0] episodes_seen=360 last_return=-244.5 (+1 eps) [worker 1] episodes_seen=350 last_return=-170.3 (+1 eps) [worker 0] episodes_seen=370 last_return=-153.5 (+1 eps) [worker 1] episodes_seen=360 last_return=-109.4 (+1 eps) [worker 0] episodes_seen=380 last_return=-262.3 (+1 eps) [worker 1] episodes_seen=370 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=390 last_return=-148.5 (+1 eps) [worker 1] episodes_seen=380 last_return=-196.7 (+1 eps) [worker 0] episodes_seen=400 last_return=-272.8 (+1 eps) [worker 1] episodes_seen=390 last_return=-181.4 (+1 eps) [worker 0] episodes_seen=410 last_return=-293.4 (+1 eps) [A2C][sync] it= 2501 steps= 100040 (+ 40) avg10=-197.08 loss=2952.766 pg=-0.000 vf=11811.064 H=0.000 gn=39910.930 [worker 1] episodes_seen=400 last_return=-209.3 (+1 eps) [worker 0] episodes_seen=420 last_return=-244.1 (+1 eps) [worker 1] episodes_seen=410 last_return=-216.5 (+1 eps) [worker 0] episodes_seen=430 last_return=-169.3 (+1 eps) [worker 1] episodes_seen=420 last_return=-233.2 (+1 eps) [worker 0] episodes_seen=440 last_return=-187.4 (+1 eps) [worker 1] episodes_seen=430 last_return=-237.2 (+1 eps) [worker 0] episodes_seen=450 last_return=-111.8 (+1 eps) [worker 1] episodes_seen=440 last_return=-272.0 (+1 eps) [worker 0] episodes_seen=460 last_return=-111.2 (+1 eps) [worker 1] episodes_seen=450 last_return=-142.0 (+1 eps) [worker 0] episodes_seen=470 last_return=-125.4 (+1 eps) [worker 1] episodes_seen=460 last_return=-146.5 (+1 eps) [A2C][sync] it= 3001 steps= 120040 (+ 40) avg10=-211.48 loss=33358.914 pg=-0.000 vf=133435.656 H=0.000 gn=106291.344 [worker 0] episodes_seen=480 last_return=-178.6 (+1 eps) [worker 1] episodes_seen=470 last_return=-189.1 (+1 eps) [worker 1] episodes_seen=480 last_return=-116.7 (+1 eps) [worker 0] episodes_seen=490 last_return=-190.4 (+1 eps) [worker 0] episodes_seen=500 last_return=-215.5 (+1 eps) [worker 1] episodes_seen=490 last_return=-144.1 (+1 eps) [worker 0] episodes_seen=510 last_return=-163.7 (+1 eps) [worker 1] episodes_seen=500 last_return=-245.2 (+1 eps) [worker 0] episodes_seen=520 last_return=-198.7 (+1 eps) [worker 1] episodes_seen=510 last_return=-259.1 (+1 eps) [worker 0] episodes_seen=530 last_return=-107.8 (+1 eps) [worker 1] episodes_seen=520 last_return=-132.1 (+1 eps) [worker 0] episodes_seen=540 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=530 last_return=-218.8 (+1 eps) [worker 0] episodes_seen=550 last_return=-144.5 (+1 eps) [worker 1] episodes_seen=540 last_return=-150.6 (+1 eps) [A2C][sync] it= 3501 steps= 140040 (+ 40) avg10=-181.38 loss=27016.354 pg=0.105 vf=108065.000 H=0.052 gn=445187.500 [worker 0] episodes_seen=560 last_return=-195.8 (+1 eps) [worker 1] episodes_seen=550 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=570 last_return=-261.4 (+1 eps) [worker 1] episodes_seen=560 last_return=-202.5 (+1 eps) [worker 0] episodes_seen=580 last_return=-307.1 (+1 eps) [worker 1] episodes_seen=570 last_return=-132.5 (+1 eps) [worker 0] episodes_seen=590 last_return=-167.2 (+1 eps) [worker 1] episodes_seen=580 last_return=-134.3 (+1 eps) [worker 0] episodes_seen=600 last_return=-241.9 (+1 eps) [worker 1] episodes_seen=590 last_return=-230.3 (+1 eps) [worker 0] episodes_seen=610 last_return=-179.9 (+1 eps) [worker 1] episodes_seen=600 last_return=-215.8 (+1 eps) [worker 0] episodes_seen=620 last_return=-117.1 (+1 eps) [worker 1] episodes_seen=610 last_return=-155.7 (+1 eps) [worker 0] episodes_seen=630 last_return=-131.4 (+1 eps) [worker 1] episodes_seen=620 last_return=-109.3 (+1 eps) [A2C][sync] it= 4001 steps= 160040 (+ 40) avg10=-161.27 loss=10904.596 pg=-0.000 vf=43618.383 H=0.000 gn=72747.453 [worker 0] episodes_seen=640 last_return=-288.5 (+1 eps) [worker 1] episodes_seen=630 last_return=-205.4 (+1 eps) [worker 1] episodes_seen=640 last_return=-307.2 (+1 eps) [worker 0] episodes_seen=650 last_return=-471.1 (+1 eps) [worker 1] episodes_seen=650 last_return=-159.5 (+1 eps) [worker 0] episodes_seen=660 last_return=-185.8 (+1 eps) [worker 1] episodes_seen=660 last_return=-281.9 (+1 eps) [worker 0] episodes_seen=670 last_return=-150.4 (+1 eps) [worker 1] episodes_seen=670 last_return=-254.3 (+1 eps) [worker 0] episodes_seen=680 last_return=-198.3 (+1 eps) [worker 1] episodes_seen=680 last_return=-240.8 (+1 eps) [worker 0] episodes_seen=690 last_return=-127.6 (+1 eps) [worker 1] episodes_seen=690 last_return=-280.0 (+1 eps) [worker 0] episodes_seen=700 last_return=-385.9 (+1 eps) [worker 1] episodes_seen=700 last_return=-232.3 (+1 eps) [worker 0] episodes_seen=710 last_return=-140.7 (+1 eps) [worker 1] episodes_seen=710 last_return=-254.5 (+1 eps) [worker 0] episodes_seen=720 last_return=-274.4 (+1 eps) [worker 1] episodes_seen=720 last_return=-283.4 (+1 eps) [worker 0] episodes_seen=730 last_return=-168.6 (+1 eps) [worker 1] episodes_seen=730 last_return=-152.0 (+1 eps) [A2C][sync] it= 4501 steps= 180040 (+ 40) avg10=-177.39 loss=41974.172 pg=-0.000 vf=167896.688 H=0.000 gn=83903.234 [worker 0] episodes_seen=740 last_return=-124.9 (+1 eps) [worker 1] episodes_seen=740 last_return=-213.0 (+1 eps) [worker 0] episodes_seen=750 last_return=-188.5 (+1 eps) [worker 1] episodes_seen=750 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=760 last_return=-264.0 (+1 eps) [worker 1] episodes_seen=760 last_return=-316.2 (+1 eps) [worker 0] episodes_seen=770 last_return=-225.1 (+1 eps) [worker 1] episodes_seen=770 last_return=-184.7 (+1 eps) [worker 0] episodes_seen=780 last_return=-152.8 (+1 eps) [worker 1] episodes_seen=780 last_return=-117.7 (+1 eps) [worker 0] episodes_seen=790 last_return=-275.4 (+1 eps) [worker 1] episodes_seen=790 last_return=-205.0 (+1 eps) [worker 0] episodes_seen=800 last_return=-260.7 (+1 eps) [worker 1] episodes_seen=800 last_return=-159.3 (+1 eps) [worker 0] episodes_seen=810 last_return=-231.8 (+1 eps) [worker 1] episodes_seen=810 last_return=-97.0 (+1 eps) [worker 0] episodes_seen=820 last_return=-165.3 (+1 eps) [worker 1] episodes_seen=820 last_return=-306.0 (+1 eps) [worker 0] episodes_seen=830 last_return=-262.7 (+1 eps) [worker 1] episodes_seen=830 last_return=-90.6 (+1 eps) [worker 0] episodes_seen=840 last_return=-110.1 (+1 eps) [worker 1] episodes_seen=840 last_return=-200.0 (+1 eps) [worker 0] episodes_seen=850 last_return=-113.5 (+1 eps) [A2C][sync] it= 5000 steps= 200000 (+ 40) avg10=-230.52 loss=41490.852 pg=-0.000 vf=165963.406 H=0.000 gn=39695.812 [Checkpoint] Saved self-describing checkpoint → part2_artifacts/checkpoints/a2c_run1_seed1227.pth [A2C][sync] done: steps=200000 time=233.3s avg10=-230.52
[Run run1_seed1227] checkpoint: part2_artifacts/checkpoints/a2c_run1_seed1227.pth [Run run1_seed1227] training plot (tail 500): part2_artifacts/train_curve_run1_seed1227.png [Run run1_seed1227] training plot (full): part2_artifacts/train_curve_full_run1_seed1227.png [Run run1_seed1227] per-worker plot: part2_artifacts/train_curve_workers_run1_seed1227.png [Run run1_seed1227] workers-average plot: part2_artifacts/train_curve_workers_avg_run1_seed1227.png
[Eval run1_seed1227] mean=-649.29 std=129.78 min=-856.11 max=-452.81 [Eval run1_seed1227] CSV: part2_artifacts/eval10_run1_seed1227.csv [Eval run1_seed1227] plot: part2_artifacts/eval10_run1_seed1227.png [Best] ep=8 return=-452.81 seed=1235
/usr/local/lib/python3.12/dist-packages/gymnasium/wrappers/rendering.py:293: UserWarning: WARN: Overwriting existing videos at /content/part2_artifacts/videos/run1_seed1227 folder (try specifying a different `video_folder` for the `RecordVideo` wrapper if this is not desired)
logger.warn(
[Video run1_seed1227] episode return=-452.81 [Video run1_seed1227] saved under: part2_artifacts/videos run1_seed1227 | mean=-649.3±129.8 | best_ep=8, best_ret=-452.8
Run#2
run_id = f"run2_seed{SEED}"
# Train (multi-worker) and save checkpoint & plots
model, logs, paths = train_once(
run_id=run_id,
n_workers=2,
total_env_steps=300_000,
T=10,
gamma=0.99,
entropy_coef=0.03,
value_coef=0.50,
max_grad_norm=0.5,
lr=7e-4,
log_every=30_000,
)
# Fixed-seed greedy evaluation (10 episodes)
metrics, eval_paths = evaluate_10(run_id, paths.ckpt_path)
# Record the best episode from eval-10 using its seed
video_dir, best_idx, best_ret = record_best_from_eval(run_id, paths.ckpt_path)
print(f"{run_id} | mean={metrics['mean']:.1f}±{metrics['std']:.1f} | best_ep={best_idx}, best_ret={best_ret:.1f}")
[Run run2_seed1227] starting training… [A2C][sync] start: workers=2, T=10, target_steps=300000, mp=fork [A2C][sync] it= 1 steps= 20 (+ 20) avg10= nan loss=32.429 pg=0.000 vf=64.941 H=1.386 gn=6.182 [worker 1] episodes_seen=10 last_return=-109.2 (+1 eps) [worker 0] episodes_seen=10 last_return=-148.6 (+1 eps) [worker 1] episodes_seen=20 last_return=-155.2 (+1 eps) [worker 0] episodes_seen=20 last_return=-184.7 (+1 eps) [worker 1] episodes_seen=30 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=30 last_return=-144.7 (+1 eps) [worker 1] episodes_seen=40 last_return=-147.4 (+1 eps) [worker 0] episodes_seen=40 last_return=-134.7 (+1 eps) [worker 1] episodes_seen=50 last_return=-179.4 (+1 eps) [worker 0] episodes_seen=50 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=60 last_return=-132.4 (+1 eps) [worker 0] episodes_seen=60 last_return=-111.0 (+1 eps) [worker 1] episodes_seen=70 last_return=-140.4 (+1 eps) [worker 0] episodes_seen=70 last_return=-152.2 (+1 eps) [worker 1] episodes_seen=80 last_return=-192.5 (+1 eps) [worker 0] episodes_seen=80 last_return=-173.0 (+1 eps) [worker 1] episodes_seen=90 last_return=-137.2 (+1 eps) [worker 0] episodes_seen=90 last_return=-132.8 (+1 eps) [worker 1] episodes_seen=100 last_return=-211.9 (+1 eps) [worker 0] episodes_seen=100 last_return=-136.8 (+1 eps) [worker 1] episodes_seen=110 last_return=-135.4 (+1 eps) [worker 0] episodes_seen=110 last_return=-136.5 (+1 eps) [A2C][sync] it= 1501 steps= 30020 (+ 20) avg10=-132.81 loss=10064.079 pg=0.000 vf=20128.158 H=0.000 gn=42307.453 [worker 1] episodes_seen=120 last_return=-159.5 (+1 eps) [worker 0] episodes_seen=120 last_return=-160.0 (+1 eps) [worker 1] episodes_seen=130 last_return=-129.1 (+1 eps) [worker 0] episodes_seen=130 last_return=-113.8 (+1 eps) [worker 1] episodes_seen=140 last_return=-151.5 (+1 eps) [worker 0] episodes_seen=140 last_return=-123.9 (+1 eps) [worker 1] episodes_seen=150 last_return=-110.4 (+1 eps) [worker 0] episodes_seen=150 last_return=-105.9 (+1 eps) [worker 1] episodes_seen=160 last_return=-132.6 (+1 eps) [worker 0] episodes_seen=160 last_return=-169.8 (+1 eps) [worker 1] episodes_seen=170 last_return=-107.2 (+1 eps) [worker 0] episodes_seen=170 last_return=-160.2 (+1 eps) [worker 1] episodes_seen=180 last_return=-86.9 (+1 eps) [worker 0] episodes_seen=180 last_return=-226.1 (+1 eps) [worker 1] episodes_seen=190 last_return=-132.9 (+1 eps) [worker 0] episodes_seen=190 last_return=-163.8 (+1 eps) [worker 1] episodes_seen=200 last_return=-139.9 (+1 eps) [worker 0] episodes_seen=200 last_return=-93.3 (+1 eps) [worker 1] episodes_seen=210 last_return=-145.4 (+1 eps) [worker 0] episodes_seen=210 last_return=-79.2 (+1 eps) [worker 1] episodes_seen=220 last_return=-86.1 (+1 eps) [worker 0] episodes_seen=220 last_return=-91.3 (+1 eps) [worker 0] episodes_seen=230 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=230 last_return=-67.6 (+1 eps) [worker 0] episodes_seen=240 last_return=-114.0 (+1 eps) [worker 1] episodes_seen=240 last_return=-131.2 (+1 eps) [A2C][sync] it= 3001 steps= 60020 (+ 20) avg10=-111.88 loss=734.386 pg=-0.377 vf=1469.538 H=0.232 gn=1985.358 [worker 0] episodes_seen=250 last_return=-135.3 (+1 eps) [worker 1] episodes_seen=250 last_return=-179.3 (+1 eps) [worker 0] episodes_seen=260 last_return=-111.1 (+1 eps) [worker 1] episodes_seen=260 last_return=-128.7 (+1 eps) [worker 1] episodes_seen=270 last_return=-86.6 (+1 eps) [worker 0] episodes_seen=270 last_return=-76.6 (+1 eps) [worker 0] episodes_seen=280 last_return=-98.1 (+1 eps) [worker 1] episodes_seen=280 last_return=100.0 (+1 eps) [worker 1] episodes_seen=290 last_return=-84.5 (+1 eps) [worker 0] episodes_seen=290 last_return=-68.6 (+1 eps) [worker 0] episodes_seen=300 last_return=-80.3 (+1 eps) [worker 1] episodes_seen=300 last_return=-84.7 (+1 eps) [worker 0] episodes_seen=310 last_return=-91.7 (+1 eps) [worker 1] episodes_seen=310 last_return=-100.6 (+1 eps) [worker 0] episodes_seen=320 last_return=-93.9 (+1 eps) [worker 1] episodes_seen=320 last_return=-96.8 (+1 eps) [A2C][sync] it= 4501 steps= 90020 (+ 20) avg10= -63.08 loss=426.495 pg=-0.077 vf=853.171 H=0.455 gn=382.022 [worker 0] episodes_seen=330 last_return=-95.2 (+1 eps) [worker 1] episodes_seen=330 last_return=-68.9 (+1 eps) [worker 0] episodes_seen=340 last_return=-115.5 (+1 eps) [worker 1] episodes_seen=340 last_return=-93.3 (+1 eps) [worker 0] episodes_seen=350 last_return=-59.4 (+1 eps) [worker 0] episodes_seen=360 last_return=-114.7 (+1 eps) [worker 1] episodes_seen=350 last_return=-58.9 (+1 eps) [worker 0] episodes_seen=370 last_return=-113.0 (+1 eps) [worker 1] episodes_seen=360 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=380 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=370 last_return=-88.8 (+1 eps) [worker 0] episodes_seen=390 last_return=-102.4 (+1 eps) [worker 1] episodes_seen=380 last_return=-145.4 (+1 eps) [worker 0] episodes_seen=400 last_return=-83.4 (+1 eps) [worker 1] episodes_seen=390 last_return=-91.3 (+1 eps) [worker 0] episodes_seen=410 last_return=-94.9 (+1 eps) [worker 1] episodes_seen=400 last_return=-65.4 (+1 eps) [worker 0] episodes_seen=420 last_return=-107.5 (+1 eps) [worker 1] episodes_seen=410 last_return=-197.8 (+1 eps) [worker 0] episodes_seen=430 last_return=-175.9 (+1 eps) [worker 1] episodes_seen=420 last_return=-116.8 (+1 eps) [worker 0] episodes_seen=440 last_return=-105.3 (+1 eps) [worker 1] episodes_seen=430 last_return=-96.7 (+1 eps) [worker 0] episodes_seen=450 last_return=-102.8 (+1 eps) [worker 1] episodes_seen=440 last_return=-85.5 (+1 eps) [worker 0] episodes_seen=460 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=450 last_return=-97.0 (+1 eps) [worker 0] episodes_seen=470 last_return=-152.5 (+1 eps) [worker 1] episodes_seen=460 last_return=-93.3 (+1 eps) [worker 0] episodes_seen=480 last_return=-85.5 (+1 eps) [A2C][sync] it= 6001 steps= 120020 (+ 20) avg10= -95.82 loss=9.187 pg=-0.029 vf=18.488 H=0.927 gn=268.622 [worker 1] episodes_seen=470 last_return=-54.0 (+1 eps) [worker 0] episodes_seen=490 last_return=-70.8 (+1 eps) [worker 1] episodes_seen=480 last_return=-124.0 (+1 eps) [worker 0] episodes_seen=500 last_return=-98.1 (+1 eps) [worker 1] episodes_seen=490 last_return=-114.7 (+1 eps) [worker 0] episodes_seen=510 last_return=-75.8 (+1 eps) [worker 1] episodes_seen=500 last_return=-92.1 (+1 eps) [worker 0] episodes_seen=520 last_return=-76.7 (+1 eps) [worker 1] episodes_seen=510 last_return=-90.3 (+1 eps) [worker 0] episodes_seen=530 last_return=-82.5 (+1 eps) [worker 1] episodes_seen=520 last_return=-109.4 (+1 eps) [worker 0] episodes_seen=540 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=530 last_return=-47.3 (+1 eps) [worker 0] episodes_seen=550 last_return=-100.8 (+1 eps) [worker 1] episodes_seen=540 last_return=-91.9 (+1 eps) [worker 0] episodes_seen=560 last_return=-72.4 (+1 eps) [worker 1] episodes_seen=550 last_return=-89.4 (+1 eps) [A2C][sync] it= 7501 steps= 150020 (+ 20) avg10= -60.10 loss=1.451 pg=0.121 vf=2.709 H=0.828 gn=19.937 [worker 1] episodes_seen=560 last_return=-1.1 (+1 eps) [worker 0] episodes_seen=570 last_return=-122.7 (+1 eps) [worker 0] episodes_seen=580 last_return=10.4 (+1 eps) [worker 1] episodes_seen=570 last_return=-109.7 (+1 eps) [A2C][sync] it= 9001 steps= 180020 (+ 20) avg10= -32.40 loss=65.852 pg=-0.267 vf=132.283 H=0.766 gn=399.762 [worker 1] episodes_seen=580 last_return=-5.7 (+1 eps) [worker 0] episodes_seen=590 last_return=-98.5 (+1 eps) [worker 0] episodes_seen=600 last_return=-3.6 (+1 eps) [worker 1] episodes_seen=590 last_return=0.2 (+1 eps) [A2C][sync] it=10501 steps= 210020 (+ 20) avg10= -25.36 loss=0.340 pg=-0.130 vf=0.996 H=0.921 gn=9.764 [worker 1] episodes_seen=600 last_return=-9.8 (+1 eps) [worker 0] episodes_seen=610 last_return=-1.5 (+1 eps) [worker 1] episodes_seen=610 last_return=-61.8 (+1 eps) [worker 0] episodes_seen=620 last_return=99.8 (+1 eps) [A2C][sync] it=12001 steps= 240020 (+ 20) avg10= -24.18 loss=0.356 pg=0.151 vf=0.463 H=0.874 gn=7.239 [worker 1] episodes_seen=620 last_return=-79.1 (+1 eps) [worker 0] episodes_seen=630 last_return=-85.0 (+1 eps) [worker 1] episodes_seen=630 last_return=-49.3 (+1 eps) [worker 0] episodes_seen=640 last_return=100.0 (+1 eps) [worker 0] episodes_seen=650 last_return=-104.7 (+1 eps) [worker 1] episodes_seen=640 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=650 last_return=-82.3 (+1 eps) [worker 0] episodes_seen=660 last_return=100.0 (+1 eps) [A2C][sync] it=13501 steps= 270020 (+ 20) avg10= 2.36 loss=1.644 pg=0.112 vf=3.121 H=0.954 gn=64.172 [worker 0] episodes_seen=670 last_return=-98.9 (+1 eps) [worker 1] episodes_seen=660 last_return=-91.4 (+1 eps) [worker 0] episodes_seen=680 last_return=-97.7 (+1 eps) [worker 1] episodes_seen=670 last_return=100.0 (+1 eps) [worker 1] episodes_seen=680 last_return=-85.7 (+1 eps) [worker 0] episodes_seen=690 last_return=-90.0 (+1 eps) [worker 1] episodes_seen=690 last_return=-104.6 (+1 eps) [worker 0] episodes_seen=700 last_return=-90.6 (+1 eps) [worker 0] episodes_seen=710 last_return=-120.8 (+1 eps) [worker 1] episodes_seen=700 last_return=-94.8 (+1 eps) [worker 1] episodes_seen=710 last_return=-86.0 (+1 eps) [worker 0] episodes_seen=720 last_return=-87.7 (+1 eps) [worker 1] episodes_seen=720 last_return=-101.6 (+1 eps) [worker 0] episodes_seen=730 last_return=-109.9 (+1 eps) [worker 1] episodes_seen=730 last_return=-91.5 (+1 eps) [A2C][sync] it=15000 steps= 300000 (+ 20) avg10= -90.81 loss=2.899 pg=-0.380 vf=6.610 H=0.872 gn=45.259 [Checkpoint] Saved self-describing checkpoint → part2_artifacts/checkpoints/a2c_run2_seed1227.pth [A2C][sync] done: steps=300000 time=438.5s avg10=-90.81
[Run run2_seed1227] checkpoint: part2_artifacts/checkpoints/a2c_run2_seed1227.pth [Run run2_seed1227] training plot (tail 500): part2_artifacts/train_curve_run2_seed1227.png [Run run2_seed1227] training plot (full): part2_artifacts/train_curve_full_run2_seed1227.png [Run run2_seed1227] per-worker plot: part2_artifacts/train_curve_workers_run2_seed1227.png [Run run2_seed1227] workers-average plot: part2_artifacts/train_curve_workers_avg_run2_seed1227.png
[Eval run2_seed1227] mean=-254.66 std=63.47 min=-336.88 max=-137.46 [Eval run2_seed1227] CSV: part2_artifacts/eval10_run2_seed1227.csv [Eval run2_seed1227] plot: part2_artifacts/eval10_run2_seed1227.png [Best] ep=3 return=-137.46 seed=1230
/usr/local/lib/python3.12/dist-packages/gymnasium/wrappers/rendering.py:293: UserWarning: WARN: Overwriting existing videos at /content/part2_artifacts/videos/run2_seed1227 folder (try specifying a different `video_folder` for the `RecordVideo` wrapper if this is not desired)
logger.warn(
[Video run2_seed1227] episode return=-137.46 [Video run2_seed1227] saved under: part2_artifacts/videos run2_seed1227 | mean=-254.7±63.5 | best_ep=3, best_ret=-137.5
Run#3
run_id = f"run3_seed{SEED}"
# Train (multi-worker) and save checkpoint & plots
model, logs, paths = train_once(
run_id=run_id,
n_workers=2,
total_env_steps=400_000,
T=20,
gamma=0.99,
entropy_coef=0.05,
value_coef=0.50,
max_grad_norm=0.5,
lr=7e-4,
log_every=40_000,
)
# Fixed-seed greedy evaluation (10 episodes)
metrics, eval_paths = evaluate_10(run_id, paths.ckpt_path)
# Record the best episode from eval-10 using its seed
video_dir, best_idx, best_ret = record_best_from_eval(run_id, paths.ckpt_path)
print(f"{run_id} | mean={metrics['mean']:.1f}±{metrics['std']:.1f} | best_ep={best_idx}, best_ret={best_ret:.1f}")
[Run run3_seed1227] starting training… [A2C][sync] start: workers=2, T=20, target_steps=400000, mp=fork [A2C][sync] it= 1 steps= 40 (+ 40) avg10= nan loss=37.621 pg=-0.000 vf=75.380 H=1.386 gn=12.192 [worker 1] episodes_seen=10 last_return=-197.7 (+1 eps) [worker 0] episodes_seen=10 last_return=-194.0 (+1 eps) [worker 1] episodes_seen=20 last_return=-202.3 (+1 eps) [worker 0] episodes_seen=20 last_return=-144.3 (+1 eps) [worker 1] episodes_seen=30 last_return=-149.9 (+1 eps) [worker 0] episodes_seen=30 last_return=-164.3 (+1 eps) [worker 1] episodes_seen=40 last_return=-274.9 (+1 eps) [worker 0] episodes_seen=40 last_return=-168.0 (+1 eps) [worker 1] episodes_seen=50 last_return=-261.6 (+1 eps) [worker 0] episodes_seen=50 last_return=-164.1 (+1 eps) [worker 1] episodes_seen=60 last_return=-182.3 (+1 eps) [worker 0] episodes_seen=60 last_return=-176.6 (+1 eps) [worker 1] episodes_seen=70 last_return=-183.1 (+1 eps) [worker 0] episodes_seen=70 last_return=-189.8 (+1 eps) [worker 1] episodes_seen=80 last_return=-233.1 (+1 eps) [worker 0] episodes_seen=80 last_return=-271.1 (+1 eps) [worker 0] episodes_seen=90 last_return=-193.3 (+1 eps) [worker 1] episodes_seen=90 last_return=-109.5 (+1 eps) [worker 1] episodes_seen=100 last_return=-278.1 (+1 eps) [worker 0] episodes_seen=100 last_return=-211.2 (+1 eps) [worker 0] episodes_seen=110 last_return=-223.9 (+1 eps) [worker 1] episodes_seen=110 last_return=-194.5 (+1 eps) [worker 0] episodes_seen=120 last_return=-256.0 (+1 eps) [worker 1] episodes_seen=120 last_return=-136.5 (+1 eps) [worker 0] episodes_seen=130 last_return=-146.2 (+1 eps) [worker 1] episodes_seen=130 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=140 last_return=-271.8 (+1 eps) [worker 0] episodes_seen=150 last_return=-348.2 (+1 eps) [worker 1] episodes_seen=140 last_return=-326.8 (+1 eps) [worker 0] episodes_seen=160 last_return=-185.3 (+1 eps) [worker 1] episodes_seen=150 last_return=-178.9 (+1 eps) [worker 0] episodes_seen=170 last_return=-106.8 (+1 eps) [A2C][sync] it= 1001 steps= 40040 (+ 40) avg10=-158.26 loss=1655.953 pg=-0.000 vf=3311.906 H=0.000 gn=32355.508 [worker 1] episodes_seen=160 last_return=-192.3 (+1 eps) [worker 1] episodes_seen=170 last_return=-140.5 (+1 eps) [worker 0] episodes_seen=180 last_return=-159.8 (+1 eps) [worker 1] episodes_seen=180 last_return=-294.8 (+1 eps) [worker 0] episodes_seen=190 last_return=-157.8 (+1 eps) [worker 1] episodes_seen=190 last_return=-224.9 (+1 eps) [worker 0] episodes_seen=200 last_return=-272.6 (+1 eps) [worker 0] episodes_seen=210 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=200 last_return=-177.8 (+1 eps) [worker 0] episodes_seen=220 last_return=-168.8 (+1 eps) [worker 1] episodes_seen=210 last_return=-234.1 (+1 eps) [worker 0] episodes_seen=230 last_return=-154.1 (+1 eps) [worker 1] episodes_seen=220 last_return=-289.2 (+1 eps) [worker 0] episodes_seen=240 last_return=-243.6 (+1 eps) [worker 1] episodes_seen=230 last_return=-187.6 (+1 eps) [worker 0] episodes_seen=250 last_return=-115.2 (+1 eps) [worker 1] episodes_seen=240 last_return=-236.6 (+1 eps) [worker 0] episodes_seen=260 last_return=-154.1 (+1 eps) [worker 1] episodes_seen=250 last_return=-104.2 (+1 eps) [worker 0] episodes_seen=270 last_return=-216.2 (+1 eps) [worker 1] episodes_seen=260 last_return=-195.5 (+1 eps) [worker 0] episodes_seen=280 last_return=-198.7 (+1 eps) [worker 1] episodes_seen=270 last_return=-130.9 (+1 eps) [worker 0] episodes_seen=290 last_return=-196.5 (+1 eps) [worker 1] episodes_seen=280 last_return=-208.6 (+1 eps) [worker 0] episodes_seen=300 last_return=-153.2 (+1 eps) [worker 1] episodes_seen=290 last_return=-199.1 (+1 eps) [worker 0] episodes_seen=310 last_return=-189.9 (+1 eps) [worker 1] episodes_seen=300 last_return=-147.8 (+1 eps) [worker 0] episodes_seen=320 last_return=-232.8 (+1 eps) [worker 1] episodes_seen=310 last_return=-108.7 (+1 eps) [worker 0] episodes_seen=330 last_return=-167.8 (+1 eps) [A2C][sync] it= 2001 steps= 80040 (+ 40) avg10=-197.14 loss=19106.516 pg=-0.000 vf=38213.031 H=0.000 gn=176129.562 [worker 1] episodes_seen=320 last_return=-135.1 (+1 eps) [worker 0] episodes_seen=340 last_return=-247.3 (+1 eps) [worker 1] episodes_seen=330 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=350 last_return=-189.4 (+1 eps) [worker 1] episodes_seen=340 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=360 last_return=-140.5 (+1 eps) [worker 1] episodes_seen=350 last_return=-151.8 (+1 eps) [worker 0] episodes_seen=370 last_return=-145.8 (+1 eps) [worker 1] episodes_seen=360 last_return=-262.1 (+1 eps) [worker 0] episodes_seen=380 last_return=-196.6 (+1 eps) [worker 1] episodes_seen=370 last_return=-116.9 (+1 eps) [worker 0] episodes_seen=390 last_return=-246.7 (+1 eps) [worker 1] episodes_seen=380 last_return=-222.1 (+1 eps) [worker 0] episodes_seen=400 last_return=-75.9 (+1 eps) [worker 1] episodes_seen=390 last_return=-213.1 (+1 eps) [worker 0] episodes_seen=410 last_return=-249.3 (+1 eps) [worker 1] episodes_seen=400 last_return=-72.0 (+1 eps) [worker 0] episodes_seen=420 last_return=-54.7 (+1 eps) [worker 1] episodes_seen=410 last_return=-241.2 (+1 eps) [worker 0] episodes_seen=430 last_return=-129.0 (+1 eps) [worker 1] episodes_seen=420 last_return=-180.5 (+1 eps) [worker 0] episodes_seen=440 last_return=-259.2 (+1 eps) [worker 1] episodes_seen=430 last_return=-241.1 (+1 eps) [worker 0] episodes_seen=450 last_return=-147.2 (+1 eps) [worker 1] episodes_seen=440 last_return=-149.3 (+1 eps) [worker 0] episodes_seen=460 last_return=-102.7 (+1 eps) [worker 1] episodes_seen=450 last_return=-86.4 (+1 eps) [worker 0] episodes_seen=470 last_return=-114.5 (+1 eps) [worker 1] episodes_seen=460 last_return=-97.7 (+1 eps) [A2C][sync] it= 3001 steps= 120040 (+ 40) avg10=-100.50 loss=4552234.000 pg=-0.410 vf=9104469.000 H=0.690 gn=3308151.500 [worker 0] episodes_seen=480 last_return=-256.8 (+1 eps) [worker 1] episodes_seen=470 last_return=-246.7 (+1 eps) [worker 0] episodes_seen=490 last_return=-229.4 (+1 eps) [worker 1] episodes_seen=480 last_return=-66.6 (+1 eps) [worker 0] episodes_seen=500 last_return=-275.0 (+1 eps) [worker 1] episodes_seen=490 last_return=-216.7 (+1 eps) [worker 0] episodes_seen=510 last_return=-100.6 (+1 eps) [worker 1] episodes_seen=500 last_return=-246.9 (+1 eps) [worker 0] episodes_seen=520 last_return=-210.6 (+1 eps) [worker 1] episodes_seen=510 last_return=-369.2 (+1 eps) [worker 0] episodes_seen=530 last_return=-81.9 (+1 eps) [worker 1] episodes_seen=520 last_return=-74.1 (+1 eps) [worker 0] episodes_seen=540 last_return=-88.8 (+1 eps) [worker 1] episodes_seen=530 last_return=-91.9 (+1 eps) [worker 0] episodes_seen=550 last_return=-72.8 (+1 eps) [worker 1] episodes_seen=540 last_return=-98.5 (+1 eps) [worker 0] episodes_seen=560 last_return=-90.9 (+1 eps) [worker 1] episodes_seen=550 last_return=-69.3 (+1 eps) [worker 0] episodes_seen=570 last_return=-64.9 (+1 eps) [worker 1] episodes_seen=560 last_return=-142.9 (+1 eps) [worker 0] episodes_seen=580 last_return=-161.8 (+1 eps) [worker 1] episodes_seen=570 last_return=-239.8 (+1 eps) [worker 0] episodes_seen=590 last_return=-166.4 (+1 eps) [worker 1] episodes_seen=580 last_return=-255.5 (+1 eps) [worker 0] episodes_seen=600 last_return=-190.1 (+1 eps) [worker 1] episodes_seen=590 last_return=-154.2 (+1 eps) [worker 0] episodes_seen=610 last_return=-290.7 (+1 eps) [worker 1] episodes_seen=600 last_return=-231.9 (+1 eps) [worker 0] episodes_seen=620 last_return=-224.2 (+1 eps) [worker 1] episodes_seen=610 last_return=-139.5 (+1 eps) [worker 0] episodes_seen=630 last_return=123.5 (+1 eps) [A2C][sync] it= 4001 steps= 160040 (+ 40) avg10=-187.51 loss=19396.607 pg=-0.079 vf=38793.438 H=0.637 gn=22415.984 [worker 1] episodes_seen=620 last_return=-266.8 (+1 eps) [worker 0] episodes_seen=640 last_return=-266.7 (+1 eps) [worker 1] episodes_seen=630 last_return=-119.3 (+1 eps) [worker 0] episodes_seen=650 last_return=-258.3 (+1 eps) [worker 1] episodes_seen=640 last_return=-182.3 (+1 eps) [worker 0] episodes_seen=660 last_return=-247.7 (+1 eps) [worker 1] episodes_seen=650 last_return=-191.9 (+1 eps) [worker 0] episodes_seen=670 last_return=-59.9 (+1 eps) [worker 1] episodes_seen=660 last_return=-262.2 (+1 eps) [worker 0] episodes_seen=680 last_return=-190.5 (+1 eps) [worker 1] episodes_seen=670 last_return=-151.3 (+1 eps) [worker 0] episodes_seen=690 last_return=-245.1 (+1 eps) [worker 1] episodes_seen=680 last_return=-262.9 (+1 eps) [worker 0] episodes_seen=700 last_return=-157.0 (+1 eps) [worker 1] episodes_seen=690 last_return=-162.9 (+1 eps) [worker 0] episodes_seen=710 last_return=-125.0 (+1 eps) [worker 1] episodes_seen=700 last_return=-205.8 (+1 eps) [worker 0] episodes_seen=720 last_return=-114.8 (+1 eps) [worker 1] episodes_seen=710 last_return=-279.0 (+1 eps) [worker 0] episodes_seen=730 last_return=-25.0 (+1 eps) [worker 1] episodes_seen=720 last_return=-163.0 (+1 eps) [worker 0] episodes_seen=740 last_return=-164.9 (+1 eps) [worker 1] episodes_seen=730 last_return=-127.2 (+1 eps) [worker 0] episodes_seen=750 last_return=-242.6 (+1 eps) [worker 1] episodes_seen=740 last_return=-192.6 (+1 eps) [worker 0] episodes_seen=760 last_return=-140.1 (+1 eps) [worker 1] episodes_seen=750 last_return=-50.7 (+1 eps) [worker 0] episodes_seen=770 last_return=-316.8 (+1 eps) [worker 1] episodes_seen=760 last_return=-200.1 (+1 eps) [worker 0] episodes_seen=780 last_return=-204.1 (+1 eps) [worker 1] episodes_seen=770 last_return=-171.3 (+1 eps) [worker 0] episodes_seen=790 last_return=-49.9 (+1 eps) [worker 1] episodes_seen=780 last_return=-334.0 (+1 eps) [worker 0] episodes_seen=800 last_return=-319.7 (+1 eps) [worker 1] episodes_seen=790 last_return=-128.5 (+1 eps) [worker 0] episodes_seen=810 last_return=-241.5 (+1 eps) [worker 1] episodes_seen=800 last_return=-322.9 (+1 eps) [worker 0] episodes_seen=820 last_return=-156.7 (+1 eps) [worker 1] episodes_seen=810 last_return=-379.8 (+1 eps) [worker 0] episodes_seen=830 last_return=-273.5 (+1 eps) [worker 1] episodes_seen=820 last_return=-224.4 (+1 eps) [worker 0] episodes_seen=840 last_return=-223.7 (+1 eps) [worker 1] episodes_seen=830 last_return=-96.5 (+1 eps) [worker 0] episodes_seen=850 last_return=-114.3 (+1 eps) [worker 1] episodes_seen=840 last_return=-234.8 (+1 eps) [worker 0] episodes_seen=860 last_return=-188.1 (+1 eps) [worker 1] episodes_seen=850 last_return=-242.6 (+1 eps) [worker 0] episodes_seen=870 last_return=-299.7 (+1 eps) [worker 1] episodes_seen=860 last_return=-246.7 (+1 eps) [worker 0] episodes_seen=880 last_return=-267.2 (+1 eps) [worker 1] episodes_seen=870 last_return=-252.8 (+1 eps) [worker 0] episodes_seen=890 last_return=-287.9 (+1 eps) [worker 1] episodes_seen=880 last_return=-248.3 (+1 eps) [worker 0] episodes_seen=900 last_return=-369.2 (+1 eps) [worker 1] episodes_seen=890 last_return=-212.8 (+1 eps) [worker 0] episodes_seen=910 last_return=-98.6 (+1 eps) [worker 1] episodes_seen=900 last_return=-248.6 (+1 eps) [A2C][sync] it= 5001 steps= 200040 (+ 40) avg10=-201.58 loss=14199.497 pg=0.000 vf=28398.994 H=0.001 gn=22845.404 [worker 0] episodes_seen=920 last_return=-164.5 (+1 eps) [worker 1] episodes_seen=910 last_return=-171.0 (+1 eps) [worker 0] episodes_seen=930 last_return=-112.6 (+1 eps) [worker 1] episodes_seen=920 last_return=-204.9 (+1 eps) [worker 0] episodes_seen=940 last_return=-130.1 (+1 eps) [worker 1] episodes_seen=930 last_return=-125.1 (+1 eps) [worker 0] episodes_seen=950 last_return=-228.0 (+1 eps) [worker 1] episodes_seen=940 last_return=-278.3 (+1 eps) [worker 0] episodes_seen=960 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=950 last_return=-235.3 (+1 eps) [worker 0] episodes_seen=970 last_return=-146.9 (+1 eps) [worker 1] episodes_seen=960 last_return=-262.5 (+1 eps) [worker 1] episodes_seen=970 last_return=-337.2 (+1 eps) [worker 0] episodes_seen=980 last_return=-270.4 (+1 eps) [worker 1] episodes_seen=980 last_return=-252.3 (+1 eps) [worker 0] episodes_seen=990 last_return=-205.7 (+1 eps) [worker 0] episodes_seen=1000 last_return=-8.0 (+1 eps) [worker 1] episodes_seen=990 last_return=-121.1 (+1 eps) [worker 0] episodes_seen=1010 last_return=-215.3 (+1 eps) [worker 1] episodes_seen=1000 last_return=-237.7 (+1 eps) [worker 0] episodes_seen=1020 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1010 last_return=-159.9 (+1 eps) [worker 0] episodes_seen=1030 last_return=-264.4 (+1 eps) [worker 1] episodes_seen=1020 last_return=-195.3 (+1 eps) [worker 0] episodes_seen=1040 last_return=-344.7 (+1 eps) [worker 1] episodes_seen=1030 last_return=-193.6 (+1 eps) [worker 0] episodes_seen=1050 last_return=-110.9 (+1 eps) [worker 1] episodes_seen=1040 last_return=-113.8 (+1 eps) [worker 0] episodes_seen=1060 last_return=-77.9 (+1 eps) [worker 1] episodes_seen=1050 last_return=-108.9 (+1 eps) [worker 0] episodes_seen=1070 last_return=-101.2 (+1 eps) [worker 1] episodes_seen=1060 last_return=-90.9 (+1 eps) [worker 1] episodes_seen=1070 last_return=-80.5 (+1 eps) [worker 0] episodes_seen=1080 last_return=-63.7 (+1 eps) [worker 1] episodes_seen=1080 last_return=-89.2 (+1 eps) [worker 0] episodes_seen=1090 last_return=-80.8 (+1 eps) [worker 1] episodes_seen=1090 last_return=-78.5 (+1 eps) [worker 0] episodes_seen=1100 last_return=-76.3 (+1 eps) [worker 1] episodes_seen=1100 last_return=-90.5 (+1 eps) [worker 0] episodes_seen=1110 last_return=-67.8 (+1 eps) [worker 1] episodes_seen=1110 last_return=-92.8 (+1 eps) [worker 0] episodes_seen=1120 last_return=-127.6 (+1 eps) [worker 1] episodes_seen=1120 last_return=-133.2 (+1 eps) [worker 0] episodes_seen=1130 last_return=-120.5 (+1 eps) [worker 1] episodes_seen=1130 last_return=-111.1 (+1 eps) [worker 0] episodes_seen=1140 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1140 last_return=-110.6 (+1 eps) [worker 0] episodes_seen=1150 last_return=-93.5 (+1 eps) [worker 1] episodes_seen=1150 last_return=-299.7 (+1 eps) [worker 0] episodes_seen=1160 last_return=-311.3 (+1 eps) [A2C][sync] it= 6001 steps= 240040 (+ 40) avg10=-233.27 loss=4302.653 pg=-0.000 vf=8605.308 H=0.012 gn=37868.965 [worker 1] episodes_seen=1160 last_return=-218.4 (+1 eps) [worker 0] episodes_seen=1170 last_return=-106.4 (+1 eps) [worker 1] episodes_seen=1170 last_return=-93.8 (+1 eps) [worker 0] episodes_seen=1180 last_return=-54.4 (+1 eps) [worker 1] episodes_seen=1180 last_return=-63.9 (+1 eps) [worker 0] episodes_seen=1190 last_return=-89.3 (+1 eps) [worker 1] episodes_seen=1190 last_return=-65.2 (+1 eps) [worker 0] episodes_seen=1200 last_return=-97.9 (+1 eps) [worker 1] episodes_seen=1200 last_return=-128.3 (+1 eps) [worker 0] episodes_seen=1210 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1210 last_return=-47.0 (+1 eps) [worker 0] episodes_seen=1220 last_return=-77.7 (+1 eps) [worker 1] episodes_seen=1220 last_return=-80.2 (+1 eps) [worker 0] episodes_seen=1230 last_return=-65.3 (+1 eps) [worker 1] episodes_seen=1230 last_return=-129.9 (+1 eps) [worker 0] episodes_seen=1240 last_return=-71.2 (+1 eps) [worker 1] episodes_seen=1240 last_return=-88.8 (+1 eps) [worker 0] episodes_seen=1250 last_return=-105.5 (+1 eps) [worker 1] episodes_seen=1250 last_return=-99.2 (+1 eps) [worker 0] episodes_seen=1260 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1260 last_return=-89.3 (+1 eps) [worker 0] episodes_seen=1270 last_return=-75.8 (+1 eps) [worker 1] episodes_seen=1270 last_return=-99.6 (+1 eps) [worker 0] episodes_seen=1280 last_return=-115.5 (+1 eps) [worker 1] episodes_seen=1280 last_return=-151.7 (+1 eps) [worker 0] episodes_seen=1290 last_return=-83.0 (+1 eps) [worker 1] episodes_seen=1290 last_return=-68.9 (+1 eps) [worker 0] episodes_seen=1300 last_return=-95.9 (+1 eps) [worker 1] episodes_seen=1300 last_return=-107.2 (+1 eps) [worker 0] episodes_seen=1310 last_return=-104.7 (+1 eps) [worker 1] episodes_seen=1310 last_return=-144.7 (+1 eps) [worker 0] episodes_seen=1320 last_return=-55.8 (+1 eps) [worker 1] episodes_seen=1320 last_return=-101.3 (+1 eps) [worker 0] episodes_seen=1330 last_return=-66.8 (+1 eps) [worker 1] episodes_seen=1330 last_return=-93.2 (+1 eps) [worker 0] episodes_seen=1340 last_return=-76.6 (+1 eps) [worker 1] episodes_seen=1340 last_return=-112.0 (+1 eps) [worker 0] episodes_seen=1350 last_return=-57.2 (+1 eps) [worker 1] episodes_seen=1350 last_return=-98.3 (+1 eps) [worker 0] episodes_seen=1360 last_return=-97.1 (+1 eps) [worker 1] episodes_seen=1360 last_return=-89.3 (+1 eps) [worker 0] episodes_seen=1370 last_return=-343.1 (+1 eps) [worker 1] episodes_seen=1370 last_return=-126.6 (+1 eps) [worker 0] episodes_seen=1380 last_return=-78.9 (+1 eps) [worker 1] episodes_seen=1380 last_return=-70.7 (+1 eps) [A2C][sync] it= 7001 steps= 280040 (+ 40) avg10= -99.17 loss=263.712 pg=-0.019 vf=527.506 H=0.434 gn=2613.221 [worker 0] episodes_seen=1390 last_return=-60.5 (+1 eps) [worker 0] episodes_seen=1400 last_return=-66.4 (+1 eps) [worker 1] episodes_seen=1390 last_return=-176.8 (+1 eps) [worker 0] episodes_seen=1410 last_return=-186.4 (+1 eps) [worker 1] episodes_seen=1400 last_return=-121.7 (+1 eps) [worker 0] episodes_seen=1420 last_return=-273.0 (+1 eps) [worker 1] episodes_seen=1410 last_return=-220.0 (+1 eps) [worker 0] episodes_seen=1430 last_return=-105.3 (+1 eps) [worker 1] episodes_seen=1420 last_return=-108.6 (+1 eps) [worker 0] episodes_seen=1440 last_return=-104.0 (+1 eps) [worker 1] episodes_seen=1430 last_return=-187.7 (+1 eps) [worker 0] episodes_seen=1450 last_return=-90.9 (+1 eps) [worker 1] episodes_seen=1440 last_return=-114.5 (+1 eps) [worker 0] episodes_seen=1460 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1450 last_return=-102.9 (+1 eps) [worker 0] episodes_seen=1470 last_return=-42.6 (+1 eps) [worker 1] episodes_seen=1460 last_return=-27.4 (+1 eps) [worker 0] episodes_seen=1480 last_return=-89.4 (+1 eps) [worker 1] episodes_seen=1470 last_return=-109.1 (+1 eps) [worker 0] episodes_seen=1490 last_return=-99.5 (+1 eps) [worker 1] episodes_seen=1480 last_return=-96.8 (+1 eps) [worker 0] episodes_seen=1500 last_return=-64.3 (+1 eps) [worker 1] episodes_seen=1490 last_return=-68.1 (+1 eps) [worker 0] episodes_seen=1510 last_return=-83.7 (+1 eps) [worker 0] episodes_seen=1520 last_return=-108.4 (+1 eps) [worker 1] episodes_seen=1500 last_return=-104.6 (+1 eps) [A2C][sync] it= 8001 steps= 320040 (+ 40) avg10= -32.69 loss=67.330 pg=0.120 vf=134.456 H=0.371 gn=1227.193 [worker 0] episodes_seen=1530 last_return=-105.4 (+1 eps) [worker 1] episodes_seen=1510 last_return=-16.5 (+1 eps) [worker 0] episodes_seen=1540 last_return=-84.0 (+1 eps) [worker 1] episodes_seen=1520 last_return=-79.7 (+1 eps) [worker 0] episodes_seen=1550 last_return=-105.4 (+1 eps) [worker 1] episodes_seen=1530 last_return=-88.0 (+1 eps) [worker 0] episodes_seen=1560 last_return=99.5 (+1 eps) [worker 1] episodes_seen=1540 last_return=-129.6 (+1 eps) [worker 0] episodes_seen=1570 last_return=1.1 (+1 eps) [worker 1] episodes_seen=1550 last_return=-73.4 (+1 eps) [worker 0] episodes_seen=1580 last_return=-99.0 (+1 eps) [worker 1] episodes_seen=1560 last_return=-78.2 (+1 eps) [worker 0] episodes_seen=1590 last_return=-148.9 (+1 eps) [worker 1] episodes_seen=1570 last_return=-231.6 (+1 eps) [worker 0] episodes_seen=1600 last_return=99.5 (+1 eps) [worker 1] episodes_seen=1580 last_return=-87.7 (+1 eps) [A2C][sync] it= 9001 steps= 360040 (+ 40) avg10= -43.13 loss=11.831 pg=-0.276 vf=24.278 H=0.626 gn=83.310 [worker 0] episodes_seen=1610 last_return=-74.1 (+1 eps) [worker 1] episodes_seen=1590 last_return=-85.8 (+1 eps) [worker 0] episodes_seen=1620 last_return=-86.9 (+1 eps) [worker 1] episodes_seen=1600 last_return=-22.6 (+1 eps) [worker 0] episodes_seen=1630 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1610 last_return=-50.4 (+1 eps) [worker 0] episodes_seen=1640 last_return=-85.0 (+1 eps) [worker 1] episodes_seen=1620 last_return=-89.8 (+1 eps) [worker 0] episodes_seen=1650 last_return=-73.4 (+1 eps) [worker 1] episodes_seen=1630 last_return=-106.2 (+1 eps) [worker 1] episodes_seen=1640 last_return=-48.9 (+1 eps) [worker 0] episodes_seen=1660 last_return=-91.2 (+1 eps) [worker 0] episodes_seen=1670 last_return=-71.7 (+1 eps) [worker 1] episodes_seen=1650 last_return=-87.9 (+1 eps) [worker 1] episodes_seen=1660 last_return=99.7 (+1 eps) [worker 0] episodes_seen=1680 last_return=99.9 (+1 eps) [worker 0] episodes_seen=1690 last_return=12.1 (+1 eps) [worker 1] episodes_seen=1670 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1680 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1700 last_return=99.7 (+1 eps) [worker 0] episodes_seen=1710 last_return=-90.8 (+1 eps) [worker 1] episodes_seen=1690 last_return=-63.3 (+1 eps) [A2C][sync] it=10000 steps= 400000 (+ 40) avg10= -83.97 loss=4.497 pg=-0.290 vf=9.613 H=0.388 gn=49.843 [Checkpoint] Saved self-describing checkpoint → part2_artifacts/checkpoints/a2c_run3_seed1227.pth [A2C][sync] done: steps=400000 time=412.2s avg10=-83.97
[Run run3_seed1227] checkpoint: part2_artifacts/checkpoints/a2c_run3_seed1227.pth [Run run3_seed1227] training plot (tail 500): part2_artifacts/train_curve_run3_seed1227.png [Run run3_seed1227] training plot (full): part2_artifacts/train_curve_full_run3_seed1227.png [Run run3_seed1227] per-worker plot: part2_artifacts/train_curve_workers_run3_seed1227.png [Run run3_seed1227] workers-average plot: part2_artifacts/train_curve_workers_avg_run3_seed1227.png
[Eval run3_seed1227] mean=-29.18 std=42.17 min=-135.39 max=21.05 [Eval run3_seed1227] CSV: part2_artifacts/eval10_run3_seed1227.csv [Eval run3_seed1227] plot: part2_artifacts/eval10_run3_seed1227.png [Best] ep=3 return=21.05 seed=1230
/usr/local/lib/python3.12/dist-packages/gymnasium/wrappers/rendering.py:293: UserWarning: WARN: Overwriting existing videos at /content/part2_artifacts/videos/run3_seed1227 folder (try specifying a different `video_folder` for the `RecordVideo` wrapper if this is not desired)
logger.warn(
[Video run3_seed1227] episode return=21.05 [Video run3_seed1227] saved under: part2_artifacts/videos run3_seed1227 | mean=-29.2±42.2 | best_ep=3, best_ret=21.0
Run#4
run_id = f"run4_seed{SEED}"
# Train (multi-worker) and save checkpoint & plots
model, logs, paths = train_once(
run_id=run_id,
n_workers=2,
total_env_steps=500_000,
T=30,
gamma=0.99,
entropy_coef=0.03,
value_coef=0.60,
max_grad_norm=0.5,
lr=5e-4,
log_every=50_000,
)
# Fixed-seed greedy evaluation (10 episodes)
metrics, eval_paths = evaluate_10(run_id, paths.ckpt_path)
# Record the best episode from eval-10 using its seed
video_dir, best_idx, best_ret = record_best_from_eval(run_id, paths.ckpt_path)
print(f"{run_id} | mean={metrics['mean']:.1f}±{metrics['std']:.1f} | best_ep={best_idx}, best_ret={best_ret:.1f}")
[Run run4_seed1227] starting training… [A2C][sync] start: workers=2, T=30, target_steps=500000, mp=fork [A2C][sync] it= 1 steps= 60 (+ 60) avg10= nan loss=660.533 pg=0.000 vf=1100.957 H=1.386 gn=80.146 [worker 0] episodes_seen=10 last_return=-141.7 (+1 eps) [worker 1] episodes_seen=10 last_return=-86.6 (+1 eps) [worker 1] episodes_seen=20 last_return=-111.7 (+1 eps) [worker 0] episodes_seen=20 last_return=-103.4 (+1 eps) [worker 1] episodes_seen=30 last_return=-135.1 (+1 eps) [worker 0] episodes_seen=30 last_return=-166.5 (+1 eps) [worker 0] episodes_seen=40 last_return=-155.5 (+1 eps) [worker 1] episodes_seen=40 last_return=-114.5 (+1 eps) [worker 0] episodes_seen=50 last_return=-145.8 (+1 eps) [worker 1] episodes_seen=50 last_return=-96.9 (+1 eps) [worker 0] episodes_seen=60 last_return=-165.0 (+1 eps) [worker 1] episodes_seen=60 last_return=-121.5 (+1 eps) [worker 0] episodes_seen=70 last_return=-120.4 (+1 eps) [worker 1] episodes_seen=70 last_return=-350.1 (+1 eps) [worker 0] episodes_seen=80 last_return=17.2 (+1 eps) [worker 1] episodes_seen=80 last_return=-140.5 (+1 eps) [worker 0] episodes_seen=90 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=90 last_return=-89.0 (+1 eps) [worker 0] episodes_seen=100 last_return=-206.7 (+1 eps) [worker 1] episodes_seen=100 last_return=-175.0 (+1 eps) [worker 0] episodes_seen=110 last_return=-140.8 (+1 eps) [worker 1] episodes_seen=110 last_return=-129.0 (+1 eps) [worker 0] episodes_seen=120 last_return=-59.3 (+1 eps) [worker 1] episodes_seen=120 last_return=-223.5 (+1 eps) [worker 0] episodes_seen=130 last_return=-110.5 (+1 eps) [worker 1] episodes_seen=130 last_return=-102.4 (+1 eps) [worker 0] episodes_seen=140 last_return=-181.3 (+1 eps) [worker 1] episodes_seen=140 last_return=-135.4 (+1 eps) [worker 0] episodes_seen=150 last_return=-123.3 (+1 eps) [worker 1] episodes_seen=150 last_return=-285.3 (+1 eps) [worker 0] episodes_seen=160 last_return=-171.1 (+1 eps) [worker 1] episodes_seen=160 last_return=-43.4 (+1 eps) [worker 0] episodes_seen=170 last_return=-352.7 (+1 eps) [worker 1] episodes_seen=170 last_return=-381.6 (+1 eps) [worker 0] episodes_seen=180 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=180 last_return=-337.6 (+1 eps) [worker 0] episodes_seen=190 last_return=-208.6 (+1 eps) [worker 1] episodes_seen=190 last_return=-183.3 (+1 eps) [worker 0] episodes_seen=200 last_return=-305.1 (+1 eps) [worker 1] episodes_seen=200 last_return=-150.2 (+1 eps) [worker 0] episodes_seen=210 last_return=-421.7 (+1 eps) [worker 1] episodes_seen=210 last_return=-426.4 (+1 eps) [worker 0] episodes_seen=220 last_return=-250.3 (+1 eps) [worker 1] episodes_seen=220 last_return=-89.0 (+1 eps) [worker 0] episodes_seen=230 last_return=-80.5 (+1 eps) [worker 1] episodes_seen=230 last_return=-85.4 (+1 eps) [worker 0] episodes_seen=240 last_return=-138.4 (+1 eps) [worker 1] episodes_seen=240 last_return=-188.8 (+1 eps) [worker 0] episodes_seen=250 last_return=-173.1 (+1 eps) [worker 1] episodes_seen=250 last_return=-332.0 (+1 eps) [worker 0] episodes_seen=260 last_return=-201.9 (+1 eps) [worker 1] episodes_seen=260 last_return=-337.9 (+1 eps) [worker 0] episodes_seen=270 last_return=-212.4 (+1 eps) [worker 1] episodes_seen=270 last_return=-82.6 (+1 eps) [worker 0] episodes_seen=280 last_return=-267.4 (+1 eps) [worker 1] episodes_seen=280 last_return=-434.1 (+1 eps) [worker 0] episodes_seen=290 last_return=-324.3 (+1 eps) [worker 1] episodes_seen=290 last_return=-109.8 (+1 eps) [worker 0] episodes_seen=300 last_return=-105.9 (+1 eps) [A2C][sync] it= 835 steps= 50100 (+ 60) avg10= -97.09 loss=723.424 pg=-0.208 vf=1206.073 H=0.394 gn=3044.408 [worker 1] episodes_seen=300 last_return=-64.7 (+1 eps) [worker 0] episodes_seen=310 last_return=-88.2 (+1 eps) [worker 1] episodes_seen=310 last_return=-298.5 (+1 eps) [worker 0] episodes_seen=320 last_return=-232.7 (+1 eps) [worker 1] episodes_seen=320 last_return=-324.9 (+1 eps) [worker 0] episodes_seen=330 last_return=-246.7 (+1 eps) [worker 1] episodes_seen=330 last_return=-351.4 (+1 eps) [worker 0] episodes_seen=340 last_return=-122.9 (+1 eps) [worker 1] episodes_seen=340 last_return=-139.6 (+1 eps) [worker 0] episodes_seen=350 last_return=-324.4 (+1 eps) [worker 1] episodes_seen=350 last_return=-132.9 (+1 eps) [worker 0] episodes_seen=360 last_return=-101.2 (+1 eps) [worker 1] episodes_seen=360 last_return=-97.4 (+1 eps) [worker 0] episodes_seen=370 last_return=-97.9 (+1 eps) [worker 1] episodes_seen=370 last_return=-70.8 (+1 eps) [worker 0] episodes_seen=380 last_return=36.1 (+1 eps) [worker 1] episodes_seen=380 last_return=-64.8 (+1 eps) [worker 0] episodes_seen=390 last_return=-65.1 (+1 eps) [worker 1] episodes_seen=390 last_return=-84.8 (+1 eps) [worker 0] episodes_seen=400 last_return=-76.3 (+1 eps) [worker 1] episodes_seen=400 last_return=-243.0 (+1 eps) [worker 0] episodes_seen=410 last_return=-143.0 (+1 eps) [worker 1] episodes_seen=410 last_return=-179.6 (+1 eps) [worker 0] episodes_seen=420 last_return=-243.3 (+1 eps) [worker 1] episodes_seen=420 last_return=-245.8 (+1 eps) [worker 0] episodes_seen=430 last_return=-380.9 (+1 eps) [worker 1] episodes_seen=430 last_return=-299.0 (+1 eps) [worker 0] episodes_seen=440 last_return=-464.5 (+1 eps) [worker 1] episodes_seen=440 last_return=-458.4 (+1 eps) [worker 0] episodes_seen=450 last_return=-137.3 (+1 eps) [worker 1] episodes_seen=450 last_return=-198.2 (+1 eps) [worker 0] episodes_seen=460 last_return=-381.0 (+1 eps) [worker 1] episodes_seen=460 last_return=-178.3 (+1 eps) [worker 0] episodes_seen=470 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=470 last_return=-337.7 (+1 eps) [worker 0] episodes_seen=480 last_return=-196.3 (+1 eps) [worker 1] episodes_seen=480 last_return=-356.8 (+1 eps) [worker 0] episodes_seen=490 last_return=-349.4 (+1 eps) [worker 1] episodes_seen=490 last_return=-122.1 (+1 eps) [worker 0] episodes_seen=500 last_return=-162.6 (+1 eps) [worker 1] episodes_seen=500 last_return=-287.8 (+1 eps) [worker 0] episodes_seen=510 last_return=-306.3 (+1 eps) [worker 1] episodes_seen=510 last_return=-206.8 (+1 eps) [worker 0] episodes_seen=520 last_return=-30.7 (+1 eps) [worker 1] episodes_seen=520 last_return=-214.2 (+1 eps) [worker 0] episodes_seen=530 last_return=-180.5 (+1 eps) [worker 1] episodes_seen=530 last_return=-141.3 (+1 eps) [worker 0] episodes_seen=540 last_return=-109.0 (+1 eps) [worker 1] episodes_seen=540 last_return=-251.3 (+1 eps) [worker 0] episodes_seen=550 last_return=-171.3 (+1 eps) [worker 1] episodes_seen=550 last_return=-141.8 (+1 eps) [worker 0] episodes_seen=560 last_return=-377.2 (+1 eps) [worker 1] episodes_seen=560 last_return=-238.9 (+1 eps) [worker 0] episodes_seen=570 last_return=-522.4 (+1 eps) [worker 1] episodes_seen=570 last_return=-255.2 (+1 eps) [worker 0] episodes_seen=580 last_return=-256.2 (+1 eps) [worker 1] episodes_seen=580 last_return=-504.2 (+1 eps) [worker 0] episodes_seen=590 last_return=-240.7 (+1 eps) [worker 1] episodes_seen=590 last_return=-346.0 (+1 eps) [worker 0] episodes_seen=600 last_return=-136.9 (+1 eps) [worker 1] episodes_seen=600 last_return=-161.6 (+1 eps) [worker 0] episodes_seen=610 last_return=-274.4 (+1 eps) [worker 1] episodes_seen=610 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=620 last_return=-318.0 (+1 eps) [worker 1] episodes_seen=620 last_return=-255.0 (+1 eps) [worker 0] episodes_seen=630 last_return=-250.2 (+1 eps) [worker 1] episodes_seen=630 last_return=-75.0 (+1 eps) [A2C][sync] it= 1669 steps= 100140 (+ 60) avg10= -94.32 loss=1531.580 pg=-0.049 vf=2552.722 H=0.130 gn=5519.020 [worker 0] episodes_seen=640 last_return=-120.8 (+1 eps) [worker 1] episodes_seen=640 last_return=-103.2 (+1 eps) [worker 0] episodes_seen=650 last_return=-97.8 (+1 eps) [worker 0] episodes_seen=660 last_return=-92.6 (+1 eps) [worker 1] episodes_seen=650 last_return=-88.3 (+1 eps) [worker 0] episodes_seen=670 last_return=-207.2 (+1 eps) [worker 1] episodes_seen=660 last_return=-101.9 (+1 eps) [worker 0] episodes_seen=680 last_return=-115.1 (+1 eps) [worker 1] episodes_seen=670 last_return=-103.4 (+1 eps) [worker 0] episodes_seen=690 last_return=-93.2 (+1 eps) [worker 1] episodes_seen=680 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=690 last_return=-173.5 (+1 eps) [worker 0] episodes_seen=700 last_return=-126.0 (+1 eps) [worker 1] episodes_seen=700 last_return=-273.9 (+1 eps) [worker 0] episodes_seen=710 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=710 last_return=-247.7 (+1 eps) [worker 0] episodes_seen=720 last_return=-216.2 (+1 eps) [worker 1] episodes_seen=720 last_return=-102.2 (+1 eps) [worker 0] episodes_seen=730 last_return=-116.4 (+1 eps) [worker 0] episodes_seen=740 last_return=-117.4 (+1 eps) [worker 1] episodes_seen=730 last_return=-35.4 (+1 eps) [worker 0] episodes_seen=750 last_return=-233.0 (+1 eps) [worker 1] episodes_seen=740 last_return=-109.0 (+1 eps) [worker 1] episodes_seen=750 last_return=-195.0 (+1 eps) [worker 0] episodes_seen=760 last_return=-135.3 (+1 eps) [worker 0] episodes_seen=770 last_return=-100.3 (+1 eps) [worker 1] episodes_seen=760 last_return=-169.4 (+1 eps) [worker 0] episodes_seen=780 last_return=-91.9 (+1 eps) [worker 1] episodes_seen=770 last_return=-94.3 (+1 eps) [worker 0] episodes_seen=790 last_return=-129.0 (+1 eps) [worker 1] episodes_seen=780 last_return=-184.3 (+1 eps) [worker 0] episodes_seen=800 last_return=-148.9 (+1 eps) [worker 1] episodes_seen=790 last_return=-176.3 (+1 eps) [worker 0] episodes_seen=810 last_return=-383.5 (+1 eps) [worker 1] episodes_seen=800 last_return=-119.3 (+1 eps) [worker 0] episodes_seen=820 last_return=-140.9 (+1 eps) [worker 1] episodes_seen=810 last_return=-115.5 (+1 eps) [worker 0] episodes_seen=830 last_return=-141.8 (+1 eps) [worker 1] episodes_seen=820 last_return=-107.2 (+1 eps) [worker 0] episodes_seen=840 last_return=-91.1 (+1 eps) [worker 1] episodes_seen=830 last_return=-97.0 (+1 eps) [worker 0] episodes_seen=850 last_return=-70.2 (+1 eps) [worker 1] episodes_seen=840 last_return=-45.2 (+1 eps) [worker 0] episodes_seen=860 last_return=-78.6 (+1 eps) [worker 1] episodes_seen=850 last_return=-101.6 (+1 eps) [worker 0] episodes_seen=870 last_return=-103.6 (+1 eps) [worker 1] episodes_seen=860 last_return=-92.8 (+1 eps) [worker 0] episodes_seen=880 last_return=-264.7 (+1 eps) [worker 1] episodes_seen=870 last_return=-171.0 (+1 eps) [worker 0] episodes_seen=890 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=880 last_return=-338.7 (+1 eps) [worker 0] episodes_seen=900 last_return=-122.6 (+1 eps) [worker 1] episodes_seen=890 last_return=-183.4 (+1 eps) [worker 0] episodes_seen=910 last_return=-194.1 (+1 eps) [worker 1] episodes_seen=900 last_return=-425.5 (+1 eps) [worker 0] episodes_seen=920 last_return=-376.9 (+1 eps) [worker 1] episodes_seen=910 last_return=-189.9 (+1 eps) [worker 0] episodes_seen=930 last_return=-166.9 (+1 eps) [worker 1] episodes_seen=920 last_return=-161.0 (+1 eps) [worker 0] episodes_seen=940 last_return=-158.4 (+1 eps) [worker 1] episodes_seen=930 last_return=-259.7 (+1 eps) [worker 0] episodes_seen=950 last_return=-191.6 (+1 eps) [A2C][sync] it= 2503 steps= 150180 (+ 60) avg10=-223.15 loss=7917.688 pg=-0.001 vf=13196.147 H=0.034 gn=41119.555 [worker 1] episodes_seen=940 last_return=-207.5 (+1 eps) [worker 0] episodes_seen=960 last_return=-307.8 (+1 eps) [worker 1] episodes_seen=950 last_return=-175.5 (+1 eps) [worker 0] episodes_seen=970 last_return=-295.4 (+1 eps) [worker 1] episodes_seen=960 last_return=-71.4 (+1 eps) [worker 0] episodes_seen=980 last_return=-93.5 (+1 eps) [worker 1] episodes_seen=970 last_return=-87.0 (+1 eps) [worker 0] episodes_seen=990 last_return=-83.6 (+1 eps) [worker 1] episodes_seen=980 last_return=-86.1 (+1 eps) [worker 0] episodes_seen=1000 last_return=-51.6 (+1 eps) [worker 1] episodes_seen=990 last_return=-118.8 (+1 eps) [worker 0] episodes_seen=1010 last_return=-63.8 (+1 eps) [worker 1] episodes_seen=1000 last_return=-109.8 (+1 eps) [worker 0] episodes_seen=1020 last_return=-68.3 (+1 eps) [worker 1] episodes_seen=1010 last_return=-57.7 (+1 eps) [worker 0] episodes_seen=1030 last_return=-113.9 (+1 eps) [worker 1] episodes_seen=1020 last_return=-200.4 (+1 eps) [worker 0] episodes_seen=1040 last_return=-205.5 (+1 eps) [worker 1] episodes_seen=1030 last_return=-255.7 (+1 eps) [worker 0] episodes_seen=1050 last_return=-294.2 (+1 eps) [worker 1] episodes_seen=1040 last_return=-143.5 (+1 eps) [worker 0] episodes_seen=1060 last_return=-232.1 (+1 eps) [worker 1] episodes_seen=1050 last_return=-119.3 (+1 eps) [worker 0] episodes_seen=1070 last_return=-87.2 (+1 eps) [worker 1] episodes_seen=1060 last_return=-226.4 (+1 eps) [worker 0] episodes_seen=1080 last_return=-115.9 (+1 eps) [worker 1] episodes_seen=1070 last_return=-99.7 (+1 eps) [worker 0] episodes_seen=1090 last_return=-113.1 (+1 eps) [worker 0] episodes_seen=1100 last_return=-286.3 (+1 eps) [worker 1] episodes_seen=1080 last_return=-238.3 (+1 eps) [worker 1] episodes_seen=1090 last_return=-88.4 (+1 eps) [worker 0] episodes_seen=1110 last_return=-95.7 (+1 eps) [worker 1] episodes_seen=1100 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1120 last_return=-76.1 (+1 eps) [worker 1] episodes_seen=1110 last_return=-205.9 (+1 eps) [worker 0] episodes_seen=1130 last_return=-108.9 (+1 eps) [worker 0] episodes_seen=1140 last_return=-95.1 (+1 eps) [worker 1] episodes_seen=1120 last_return=-302.8 (+1 eps) [worker 0] episodes_seen=1150 last_return=-79.6 (+1 eps) [worker 1] episodes_seen=1130 last_return=-117.1 (+1 eps) [worker 0] episodes_seen=1160 last_return=-99.8 (+1 eps) [worker 1] episodes_seen=1140 last_return=70.3 (+1 eps) [worker 0] episodes_seen=1170 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1150 last_return=-170.6 (+1 eps) [worker 0] episodes_seen=1180 last_return=-271.4 (+1 eps) [worker 1] episodes_seen=1160 last_return=-238.3 (+1 eps) [worker 0] episodes_seen=1190 last_return=-208.6 (+1 eps) [worker 1] episodes_seen=1170 last_return=-180.1 (+1 eps) [worker 0] episodes_seen=1200 last_return=-69.3 (+1 eps) [worker 1] episodes_seen=1180 last_return=-78.8 (+1 eps) [worker 0] episodes_seen=1210 last_return=-89.9 (+1 eps) [worker 1] episodes_seen=1190 last_return=-83.3 (+1 eps) [worker 0] episodes_seen=1220 last_return=-105.9 (+1 eps) [worker 1] episodes_seen=1200 last_return=-180.6 (+1 eps) [worker 0] episodes_seen=1230 last_return=-225.6 (+1 eps) [worker 1] episodes_seen=1210 last_return=-270.4 (+1 eps) [worker 0] episodes_seen=1240 last_return=-101.8 (+1 eps) [worker 1] episodes_seen=1220 last_return=-180.1 (+1 eps) [worker 0] episodes_seen=1250 last_return=-260.9 (+1 eps) [worker 1] episodes_seen=1230 last_return=-154.7 (+1 eps) [A2C][sync] it= 3337 steps= 200220 (+ 60) avg10=-165.77 loss=296.968 pg=0.096 vf=494.820 H=0.657 gn=1575.387 [worker 0] episodes_seen=1260 last_return=-97.1 (+1 eps) [worker 1] episodes_seen=1240 last_return=-19.7 (+1 eps) [worker 1] episodes_seen=1250 last_return=-229.0 (+1 eps) [worker 0] episodes_seen=1270 last_return=-184.1 (+1 eps) [worker 1] episodes_seen=1260 last_return=-82.7 (+1 eps) [worker 0] episodes_seen=1280 last_return=-102.9 (+1 eps) [worker 0] episodes_seen=1290 last_return=-125.0 (+1 eps) [worker 1] episodes_seen=1270 last_return=-142.2 (+1 eps) [worker 0] episodes_seen=1300 last_return=-137.0 (+1 eps) [worker 1] episodes_seen=1280 last_return=-119.2 (+1 eps) [worker 1] episodes_seen=1290 last_return=-121.9 (+1 eps) [worker 0] episodes_seen=1310 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1300 last_return=-101.8 (+1 eps) [worker 0] episodes_seen=1320 last_return=-112.5 (+1 eps) [worker 1] episodes_seen=1310 last_return=-87.8 (+1 eps) [worker 0] episodes_seen=1330 last_return=-141.5 (+1 eps) [worker 1] episodes_seen=1320 last_return=-149.8 (+1 eps) [worker 0] episodes_seen=1340 last_return=-24.3 (+1 eps) [worker 1] episodes_seen=1330 last_return=-86.5 (+1 eps) [worker 0] episodes_seen=1350 last_return=-106.7 (+1 eps) [worker 1] episodes_seen=1340 last_return=-141.2 (+1 eps) [worker 0] episodes_seen=1360 last_return=-92.9 (+1 eps) [worker 1] episodes_seen=1350 last_return=-78.7 (+1 eps) [worker 0] episodes_seen=1370 last_return=-101.8 (+1 eps) [worker 1] episodes_seen=1360 last_return=-259.1 (+1 eps) [worker 0] episodes_seen=1380 last_return=-102.1 (+1 eps) [worker 1] episodes_seen=1370 last_return=-195.4 (+1 eps) [worker 0] episodes_seen=1390 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1380 last_return=-130.8 (+1 eps) [worker 0] episodes_seen=1400 last_return=-104.5 (+1 eps) [worker 1] episodes_seen=1390 last_return=-311.3 (+1 eps) [worker 0] episodes_seen=1410 last_return=-160.1 (+1 eps) [worker 1] episodes_seen=1400 last_return=-107.8 (+1 eps) [worker 0] episodes_seen=1420 last_return=-64.1 (+1 eps) [worker 1] episodes_seen=1410 last_return=-68.8 (+1 eps) [worker 0] episodes_seen=1430 last_return=-128.3 (+1 eps) [worker 1] episodes_seen=1420 last_return=-116.4 (+1 eps) [worker 0] episodes_seen=1440 last_return=-103.3 (+1 eps) [worker 1] episodes_seen=1430 last_return=-129.0 (+1 eps) [worker 0] episodes_seen=1450 last_return=-237.9 (+1 eps) [worker 1] episodes_seen=1440 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1460 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1450 last_return=-124.2 (+1 eps) [worker 0] episodes_seen=1470 last_return=-130.1 (+1 eps) [worker 1] episodes_seen=1460 last_return=-91.8 (+1 eps) [worker 0] episodes_seen=1480 last_return=-116.5 (+1 eps) [A2C][sync] it= 4171 steps= 250260 (+ 60) avg10=-121.85 loss=101.607 pg=0.038 vf=169.311 H=0.597 gn=842.045 [worker 1] episodes_seen=1470 last_return=-153.8 (+1 eps) [worker 0] episodes_seen=1490 last_return=-132.2 (+1 eps) [worker 1] episodes_seen=1480 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1500 last_return=-260.6 (+1 eps) [worker 0] episodes_seen=1510 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1490 last_return=-114.2 (+1 eps) [worker 0] episodes_seen=1520 last_return=-91.9 (+1 eps) [worker 1] episodes_seen=1500 last_return=-84.4 (+1 eps) [worker 0] episodes_seen=1530 last_return=-88.1 (+1 eps) [worker 1] episodes_seen=1510 last_return=-110.7 (+1 eps) [worker 0] episodes_seen=1540 last_return=-123.2 (+1 eps) [worker 1] episodes_seen=1520 last_return=-412.6 (+1 eps) [worker 0] episodes_seen=1550 last_return=-84.7 (+1 eps) [worker 1] episodes_seen=1530 last_return=-81.5 (+1 eps) [worker 1] episodes_seen=1540 last_return=-100.2 (+1 eps) [worker 0] episodes_seen=1560 last_return=-101.8 (+1 eps) [worker 1] episodes_seen=1550 last_return=-62.9 (+1 eps) [worker 0] episodes_seen=1570 last_return=-132.0 (+1 eps) [worker 1] episodes_seen=1560 last_return=-107.4 (+1 eps) [worker 0] episodes_seen=1580 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1570 last_return=-105.0 (+1 eps) [worker 0] episodes_seen=1590 last_return=-136.3 (+1 eps) [worker 1] episodes_seen=1580 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1600 last_return=-147.7 (+1 eps) [worker 1] episodes_seen=1590 last_return=-111.2 (+1 eps) [worker 0] episodes_seen=1610 last_return=-229.5 (+1 eps) [worker 1] episodes_seen=1600 last_return=-177.5 (+1 eps) [worker 0] episodes_seen=1620 last_return=-72.6 (+1 eps) [worker 1] episodes_seen=1610 last_return=-189.8 (+1 eps) [worker 0] episodes_seen=1630 last_return=-128.6 (+1 eps) [worker 1] episodes_seen=1620 last_return=-125.9 (+1 eps) [worker 0] episodes_seen=1640 last_return=-89.7 (+1 eps) [worker 1] episodes_seen=1630 last_return=-111.8 (+1 eps) [worker 0] episodes_seen=1650 last_return=-22.7 (+1 eps) [worker 1] episodes_seen=1640 last_return=-95.6 (+1 eps) [worker 0] episodes_seen=1660 last_return=-126.6 (+1 eps) [worker 1] episodes_seen=1650 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1670 last_return=-126.7 (+1 eps) [worker 0] episodes_seen=1680 last_return=-106.2 (+1 eps) [worker 1] episodes_seen=1660 last_return=42.7 (+1 eps) [worker 0] episodes_seen=1690 last_return=-45.7 (+1 eps) [worker 1] episodes_seen=1670 last_return=-77.8 (+1 eps) [A2C][sync] it= 5005 steps= 300300 (+ 60) avg10= -85.08 loss=20.553 pg=0.005 vf=34.274 H=0.547 gn=553.650 [worker 0] episodes_seen=1700 last_return=-123.2 (+1 eps) [worker 1] episodes_seen=1680 last_return=-85.1 (+1 eps) [worker 0] episodes_seen=1710 last_return=-104.5 (+1 eps) [worker 1] episodes_seen=1690 last_return=-105.1 (+1 eps) [worker 0] episodes_seen=1720 last_return=-246.7 (+1 eps) [worker 1] episodes_seen=1700 last_return=-102.6 (+1 eps) [worker 0] episodes_seen=1730 last_return=-108.9 (+1 eps) [worker 1] episodes_seen=1710 last_return=-146.4 (+1 eps) [worker 0] episodes_seen=1740 last_return=-132.6 (+1 eps) [worker 1] episodes_seen=1720 last_return=-113.1 (+1 eps) [worker 0] episodes_seen=1750 last_return=-130.4 (+1 eps) [worker 1] episodes_seen=1730 last_return=-111.1 (+1 eps) [worker 0] episodes_seen=1760 last_return=-111.6 (+1 eps) [worker 0] episodes_seen=1770 last_return=-111.4 (+1 eps) [worker 1] episodes_seen=1740 last_return=-82.3 (+1 eps) [worker 0] episodes_seen=1780 last_return=-87.9 (+1 eps) [worker 1] episodes_seen=1750 last_return=-110.7 (+1 eps) [worker 0] episodes_seen=1790 last_return=-110.6 (+1 eps) [worker 1] episodes_seen=1760 last_return=-74.1 (+1 eps) [worker 0] episodes_seen=1800 last_return=-143.7 (+1 eps) [worker 1] episodes_seen=1770 last_return=-105.1 (+1 eps) [worker 0] episodes_seen=1810 last_return=-71.2 (+1 eps) [worker 1] episodes_seen=1780 last_return=-113.2 (+1 eps) [worker 0] episodes_seen=1820 last_return=-142.6 (+1 eps) [worker 1] episodes_seen=1790 last_return=-88.2 (+1 eps) [worker 0] episodes_seen=1830 last_return=-128.5 (+1 eps) [worker 1] episodes_seen=1800 last_return=-131.5 (+1 eps) [worker 0] episodes_seen=1840 last_return=-98.3 (+1 eps) [worker 1] episodes_seen=1810 last_return=-101.6 (+1 eps) [worker 1] episodes_seen=1820 last_return=-125.9 (+1 eps) [worker 0] episodes_seen=1850 last_return=-123.1 (+1 eps) [worker 1] episodes_seen=1830 last_return=-86.3 (+1 eps) [worker 0] episodes_seen=1860 last_return=-85.5 (+1 eps) [worker 1] episodes_seen=1840 last_return=-88.0 (+1 eps) [worker 0] episodes_seen=1870 last_return=-126.0 (+1 eps) [worker 1] episodes_seen=1850 last_return=-174.7 (+1 eps) [worker 0] episodes_seen=1880 last_return=-151.4 (+1 eps) [worker 1] episodes_seen=1860 last_return=-97.5 (+1 eps) [worker 0] episodes_seen=1890 last_return=-78.2 (+1 eps) [worker 1] episodes_seen=1870 last_return=-93.9 (+1 eps) [worker 0] episodes_seen=1900 last_return=-91.0 (+1 eps) [worker 1] episodes_seen=1880 last_return=-96.4 (+1 eps) [worker 0] episodes_seen=1910 last_return=-117.5 (+1 eps) [worker 1] episodes_seen=1890 last_return=-89.3 (+1 eps) [A2C][sync] it= 5839 steps= 350340 (+ 60) avg10=-142.45 loss=80.809 pg=-0.056 vf=134.812 H=0.719 gn=348.663 [worker 0] episodes_seen=1920 last_return=-158.0 (+1 eps) [worker 1] episodes_seen=1900 last_return=-118.8 (+1 eps) [worker 0] episodes_seen=1930 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1910 last_return=-124.1 (+1 eps) [worker 0] episodes_seen=1940 last_return=-137.8 (+1 eps) [worker 1] episodes_seen=1920 last_return=-158.3 (+1 eps) [worker 0] episodes_seen=1950 last_return=-98.5 (+1 eps) [worker 1] episodes_seen=1930 last_return=-138.8 (+1 eps) [worker 0] episodes_seen=1960 last_return=-108.0 (+1 eps) [worker 1] episodes_seen=1940 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1970 last_return=-84.3 (+1 eps) [worker 1] episodes_seen=1950 last_return=-126.1 (+1 eps) [worker 0] episodes_seen=1980 last_return=-161.0 (+1 eps) [worker 1] episodes_seen=1960 last_return=-121.0 (+1 eps) [worker 0] episodes_seen=1990 last_return=-137.2 (+1 eps) [worker 1] episodes_seen=1970 last_return=-94.2 (+1 eps) [worker 0] episodes_seen=2000 last_return=-66.5 (+1 eps) [worker 1] episodes_seen=1980 last_return=-85.9 (+1 eps) [worker 0] episodes_seen=2010 last_return=-84.5 (+1 eps) [worker 1] episodes_seen=1990 last_return=-100.3 (+1 eps) [worker 0] episodes_seen=2020 last_return=-90.8 (+1 eps) [worker 1] episodes_seen=2000 last_return=-103.7 (+1 eps) [worker 0] episodes_seen=2030 last_return=-223.3 (+1 eps) [worker 1] episodes_seen=2010 last_return=-168.7 (+1 eps) [worker 0] episodes_seen=2040 last_return=-80.7 (+1 eps) [worker 1] episodes_seen=2020 last_return=-102.4 (+1 eps) [worker 0] episodes_seen=2050 last_return=-183.9 (+1 eps) [worker 1] episodes_seen=2030 last_return=-82.4 (+1 eps) [worker 0] episodes_seen=2060 last_return=-273.5 (+1 eps) [worker 1] episodes_seen=2040 last_return=-133.3 (+1 eps) [worker 0] episodes_seen=2070 last_return=-89.3 (+1 eps) [worker 0] episodes_seen=2080 last_return=-94.4 (+1 eps) [worker 1] episodes_seen=2050 last_return=-89.9 (+1 eps) [worker 0] episodes_seen=2090 last_return=-58.3 (+1 eps) [worker 1] episodes_seen=2060 last_return=-74.9 (+1 eps) [worker 0] episodes_seen=2100 last_return=-47.6 (+1 eps) [worker 1] episodes_seen=2070 last_return=-78.2 (+1 eps) [worker 0] episodes_seen=2110 last_return=-96.6 (+1 eps) [worker 1] episodes_seen=2080 last_return=-93.9 (+1 eps) [worker 0] episodes_seen=2120 last_return=6.2 (+1 eps) [worker 1] episodes_seen=2090 last_return=-76.2 (+1 eps) [worker 0] episodes_seen=2130 last_return=-148.3 (+1 eps) [worker 1] episodes_seen=2100 last_return=-98.1 (+1 eps) [worker 0] episodes_seen=2140 last_return=-86.6 (+1 eps) [worker 1] episodes_seen=2110 last_return=-89.1 (+1 eps) [worker 0] episodes_seen=2150 last_return=-83.6 (+1 eps) [A2C][sync] it= 6673 steps= 400380 (+ 60) avg10=-113.96 loss=15.009 pg=-0.063 vf=25.157 H=0.755 gn=362.362 [worker 1] episodes_seen=2120 last_return=-68.5 (+1 eps) [worker 0] episodes_seen=2160 last_return=-86.9 (+1 eps) [worker 1] episodes_seen=2130 last_return=-140.3 (+1 eps) [worker 1] episodes_seen=2140 last_return=-233.6 (+1 eps) [worker 0] episodes_seen=2170 last_return=-192.8 (+1 eps) [worker 1] episodes_seen=2150 last_return=-203.0 (+1 eps) [worker 0] episodes_seen=2180 last_return=-107.0 (+1 eps) [worker 1] episodes_seen=2160 last_return=-79.5 (+1 eps) [worker 0] episodes_seen=2190 last_return=-120.8 (+1 eps) [worker 1] episodes_seen=2170 last_return=-139.5 (+1 eps) [worker 0] episodes_seen=2200 last_return=-86.5 (+1 eps) [worker 1] episodes_seen=2180 last_return=-99.0 (+1 eps) [worker 0] episodes_seen=2210 last_return=-122.4 (+1 eps) [worker 1] episodes_seen=2190 last_return=-90.9 (+1 eps) [worker 0] episodes_seen=2220 last_return=-75.2 (+1 eps) [worker 1] episodes_seen=2200 last_return=-90.8 (+1 eps) [worker 1] episodes_seen=2210 last_return=-87.3 (+1 eps) [worker 0] episodes_seen=2230 last_return=-82.9 (+1 eps) [worker 0] episodes_seen=2240 last_return=-98.6 (+1 eps) [worker 1] episodes_seen=2220 last_return=-91.4 (+1 eps) [worker 0] episodes_seen=2250 last_return=-52.9 (+1 eps) [worker 1] episodes_seen=2230 last_return=-118.8 (+1 eps) [worker 0] episodes_seen=2260 last_return=-119.0 (+1 eps) [worker 1] episodes_seen=2240 last_return=-218.9 (+1 eps) [worker 0] episodes_seen=2270 last_return=-129.9 (+1 eps) [worker 1] episodes_seen=2250 last_return=-55.3 (+1 eps) [worker 1] episodes_seen=2260 last_return=-116.0 (+1 eps) [worker 0] episodes_seen=2280 last_return=-96.4 (+1 eps) [worker 1] episodes_seen=2270 last_return=-107.0 (+1 eps) [worker 0] episodes_seen=2290 last_return=-85.9 (+1 eps) [worker 1] episodes_seen=2280 last_return=-87.7 (+1 eps) [worker 0] episodes_seen=2300 last_return=-195.7 (+1 eps) [worker 1] episodes_seen=2290 last_return=-108.9 (+1 eps) [worker 0] episodes_seen=2310 last_return=-112.1 (+1 eps) [worker 1] episodes_seen=2300 last_return=-93.2 (+1 eps) [worker 0] episodes_seen=2320 last_return=-47.3 (+1 eps) [worker 1] episodes_seen=2310 last_return=-262.4 (+1 eps) [worker 0] episodes_seen=2330 last_return=-117.7 (+1 eps) [A2C][sync] it= 7507 steps= 450420 (+ 60) avg10= -99.99 loss=12.801 pg=0.467 vf=20.592 H=0.712 gn=200.852 [worker 1] episodes_seen=2320 last_return=-102.8 (+1 eps) [worker 1] episodes_seen=2330 last_return=-110.7 (+1 eps) [worker 0] episodes_seen=2340 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2340 last_return=-110.1 (+1 eps) [worker 1] episodes_seen=2350 last_return=-86.8 (+1 eps) [worker 0] episodes_seen=2350 last_return=-77.8 (+1 eps) [worker 1] episodes_seen=2360 last_return=-75.3 (+1 eps) [worker 0] episodes_seen=2360 last_return=-94.3 (+1 eps) [worker 1] episodes_seen=2370 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=2370 last_return=-56.9 (+1 eps) [worker 0] episodes_seen=2380 last_return=-279.8 (+1 eps) [worker 1] episodes_seen=2380 last_return=-195.3 (+1 eps) [worker 0] episodes_seen=2390 last_return=-125.4 (+1 eps) [worker 1] episodes_seen=2390 last_return=-147.5 (+1 eps) [worker 0] episodes_seen=2400 last_return=-95.7 (+1 eps) [worker 1] episodes_seen=2400 last_return=-181.0 (+1 eps) [worker 1] episodes_seen=2410 last_return=-116.7 (+1 eps) [worker 0] episodes_seen=2410 last_return=-84.4 (+1 eps) [worker 1] episodes_seen=2420 last_return=-94.4 (+1 eps) [worker 0] episodes_seen=2420 last_return=-77.5 (+1 eps) [worker 1] episodes_seen=2430 last_return=-81.8 (+1 eps) [worker 0] episodes_seen=2430 last_return=-35.2 (+1 eps) [worker 1] episodes_seen=2440 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=2440 last_return=-49.1 (+1 eps) [worker 1] episodes_seen=2450 last_return=-68.1 (+1 eps) [worker 0] episodes_seen=2450 last_return=-77.4 (+1 eps) [worker 1] episodes_seen=2460 last_return=-81.0 (+1 eps) [worker 0] episodes_seen=2460 last_return=-66.1 (+1 eps) [worker 1] episodes_seen=2470 last_return=-80.6 (+1 eps) [worker 1] episodes_seen=2480 last_return=-82.1 (+1 eps) [worker 0] episodes_seen=2470 last_return=-64.4 (+1 eps) [worker 1] episodes_seen=2490 last_return=-17.0 (+1 eps) [worker 0] episodes_seen=2480 last_return=-95.5 (+1 eps) [A2C][sync] it= 8334 steps= 500040 (+ 60) avg10= -58.73 loss=99.849 pg=0.012 vf=166.454 H=1.180 gn=602.660 [Checkpoint] Saved self-describing checkpoint → part2_artifacts/checkpoints/a2c_run4_seed1227.pth [A2C][sync] done: steps=500040 time=445.7s avg10=-58.73
[Run run4_seed1227] checkpoint: part2_artifacts/checkpoints/a2c_run4_seed1227.pth [Run run4_seed1227] training plot (tail 500): part2_artifacts/train_curve_run4_seed1227.png [Run run4_seed1227] training plot (full): part2_artifacts/train_curve_full_run4_seed1227.png [Run run4_seed1227] per-worker plot: part2_artifacts/train_curve_workers_run4_seed1227.png [Run run4_seed1227] workers-average plot: part2_artifacts/train_curve_workers_avg_run4_seed1227.png
[Eval run4_seed1227] mean=-108.12 std=23.96 min=-155.93 max=-78.06 [Eval run4_seed1227] CSV: part2_artifacts/eval10_run4_seed1227.csv [Eval run4_seed1227] plot: part2_artifacts/eval10_run4_seed1227.png [Best] ep=2 return=-78.06 seed=1229
/usr/local/lib/python3.12/dist-packages/gymnasium/wrappers/rendering.py:293: UserWarning: WARN: Overwriting existing videos at /content/part2_artifacts/videos/run4_seed1227 folder (try specifying a different `video_folder` for the `RecordVideo` wrapper if this is not desired)
logger.warn(
[Video run4_seed1227] episode return=-78.06 [Video run4_seed1227] saved under: part2_artifacts/videos run4_seed1227 | mean=-108.1±24.0 | best_ep=2, best_ret=-78.1
Run#5
run_id = f"run5_seed{SEED}"
# Train (multi-worker) and save checkpoint & plots
model, logs, paths = train_once(
run_id=run_id,
n_workers=4,
total_env_steps=800_000,
T=30,
gamma=0.99,
entropy_coef=0.03,
value_coef=0.50,
max_grad_norm=0.5,
lr=7e-4,
log_every=40_000,
)
# Fixed-seed greedy evaluation (10 episodes)
metrics, eval_paths = evaluate_10(run_id, paths.ckpt_path)
# Record the best episode from eval-10 using its seed
video_dir, best_idx, best_ret = record_best_from_eval(run_id, paths.ckpt_path)
print(f"{run_id} | mean={metrics['mean']:.1f}±{metrics['std']:.1f} | best_ep={best_idx}, best_ret={best_ret:.1f}")
[Run run5_seed1227] starting training… [A2C][sync] start: workers=4, T=30, target_steps=800000, mp=fork [A2C][sync] it= 1 steps= 120 (+120) avg10= nan loss=54.319 pg=0.000 vf=108.721 H=1.386 gn=14.974 [worker 0] episodes_seen=10 last_return=-178.6 (+1 eps) [worker 2] episodes_seen=10 last_return=-306.1 (+1 eps) [worker 1] episodes_seen=10 last_return=-244.3 (+1 eps) [worker 3] episodes_seen=10 last_return=-136.7 (+1 eps) [worker 0] episodes_seen=20 last_return=-201.4 (+1 eps) [worker 1] episodes_seen=20 last_return=-173.8 (+1 eps) [worker 3] episodes_seen=20 last_return=-319.9 (+1 eps) [worker 2] episodes_seen=20 last_return=-202.1 (+1 eps) [worker 1] episodes_seen=30 last_return=-250.5 (+1 eps) [worker 0] episodes_seen=30 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=30 last_return=-295.5 (+1 eps) [worker 2] episodes_seen=30 last_return=-248.7 (+1 eps) [worker 0] episodes_seen=40 last_return=-345.7 (+1 eps) [worker 3] episodes_seen=40 last_return=-148.7 (+1 eps) [worker 1] episodes_seen=40 last_return=-325.9 (+1 eps) [worker 2] episodes_seen=40 last_return=-207.0 (+1 eps) [worker 2] episodes_seen=50 last_return=-322.6 (+1 eps) [worker 1] episodes_seen=50 last_return=-313.1 (+1 eps) [worker 0] episodes_seen=50 last_return=-220.6 (+1 eps) [worker 3] episodes_seen=50 last_return=-427.4 (+1 eps) [worker 2] episodes_seen=60 last_return=-155.7 (+1 eps) [worker 1] episodes_seen=60 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=60 last_return=-275.0 (+1 eps) [worker 3] episodes_seen=60 last_return=-437.0 (+1 eps) [worker 2] episodes_seen=70 last_return=-208.3 (+1 eps) [worker 0] episodes_seen=70 last_return=-389.0 (+1 eps) [worker 1] episodes_seen=70 last_return=-232.7 (+1 eps) [worker 3] episodes_seen=70 last_return=-263.0 (+1 eps) [worker 2] episodes_seen=80 last_return=-128.7 (+1 eps) [worker 0] episodes_seen=80 last_return=-55.5 (+1 eps) [worker 3] episodes_seen=80 last_return=-459.8 (+1 eps) [worker 1] episodes_seen=80 last_return=-423.8 (+1 eps) [worker 2] episodes_seen=90 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=90 last_return=-349.3 (+1 eps) [worker 3] episodes_seen=90 last_return=-155.2 (+1 eps) [worker 1] episodes_seen=90 last_return=-279.5 (+1 eps) [worker 2] episodes_seen=100 last_return=-286.5 (+1 eps) [worker 3] episodes_seen=100 last_return=-337.3 (+1 eps) [worker 0] episodes_seen=100 last_return=-250.4 (+1 eps) [worker 1] episodes_seen=100 last_return=-359.4 (+1 eps) [worker 2] episodes_seen=110 last_return=-332.8 (+1 eps) [worker 3] episodes_seen=110 last_return=-117.4 (+1 eps) [worker 1] episodes_seen=110 last_return=-130.8 (+1 eps) [worker 0] episodes_seen=110 last_return=-236.8 (+1 eps) [worker 3] episodes_seen=120 last_return=-226.8 (+1 eps) [worker 2] episodes_seen=120 last_return=-256.9 (+1 eps) [worker 1] episodes_seen=120 last_return=-266.8 (+1 eps) [worker 0] episodes_seen=120 last_return=-318.0 (+1 eps) [worker 2] episodes_seen=130 last_return=-100.5 (+1 eps) [worker 3] episodes_seen=130 last_return=-84.1 (+1 eps) [worker 1] episodes_seen=130 last_return=-322.6 (+1 eps) [worker 0] episodes_seen=130 last_return=-325.8 (+1 eps) [worker 2] episodes_seen=140 last_return=-104.7 (+1 eps) [worker 3] episodes_seen=140 last_return=-93.3 (+1 eps) [worker 1] episodes_seen=140 last_return=-95.7 (+1 eps) [worker 0] episodes_seen=140 last_return=-67.4 (+1 eps) [A2C][sync] it= 335 steps= 40200 (+120) avg10=-102.86 loss=626.798 pg=-0.133 vf=1253.870 H=0.148 gn=2806.643 [worker 2] episodes_seen=150 last_return=-91.4 (+1 eps) [worker 3] episodes_seen=150 last_return=-128.0 (+1 eps) [worker 1] episodes_seen=150 last_return=-208.7 (+1 eps) [worker 0] episodes_seen=150 last_return=-107.7 (+1 eps) [worker 3] episodes_seen=160 last_return=-270.2 (+1 eps) [worker 2] episodes_seen=160 last_return=-143.9 (+1 eps) [worker 1] episodes_seen=160 last_return=20.7 (+1 eps) [worker 0] episodes_seen=160 last_return=-254.0 (+1 eps) [worker 3] episodes_seen=170 last_return=-153.8 (+1 eps) [worker 2] episodes_seen=170 last_return=-271.7 (+1 eps) [worker 1] episodes_seen=170 last_return=-316.1 (+1 eps) [worker 0] episodes_seen=170 last_return=-102.6 (+1 eps) [worker 3] episodes_seen=180 last_return=-149.4 (+1 eps) [worker 2] episodes_seen=180 last_return=-351.7 (+1 eps) [worker 0] episodes_seen=180 last_return=-116.5 (+1 eps) [worker 1] episodes_seen=180 last_return=-289.8 (+1 eps) [worker 3] episodes_seen=190 last_return=-367.0 (+1 eps) [worker 2] episodes_seen=190 last_return=-450.5 (+1 eps) [worker 1] episodes_seen=190 last_return=-248.2 (+1 eps) [worker 0] episodes_seen=190 last_return=-253.4 (+1 eps) [worker 3] episodes_seen=200 last_return=47.5 (+1 eps) [worker 1] episodes_seen=200 last_return=-383.1 (+1 eps) [worker 2] episodes_seen=200 last_return=-105.0 (+1 eps) [worker 0] episodes_seen=200 last_return=-327.2 (+1 eps) [worker 3] episodes_seen=210 last_return=-327.5 (+1 eps) [worker 2] episodes_seen=210 last_return=-330.7 (+1 eps) [worker 0] episodes_seen=210 last_return=-222.4 (+1 eps) [worker 1] episodes_seen=210 last_return=-115.4 (+1 eps) [worker 3] episodes_seen=220 last_return=-336.9 (+1 eps) [worker 1] episodes_seen=220 last_return=-348.2 (+1 eps) [worker 2] episodes_seen=220 last_return=-210.0 (+1 eps) [worker 0] episodes_seen=220 last_return=-145.0 (+1 eps) [worker 3] episodes_seen=230 last_return=-156.5 (+1 eps) [worker 2] episodes_seen=230 last_return=-294.5 (+1 eps) [worker 0] episodes_seen=230 last_return=-245.6 (+1 eps) [worker 1] episodes_seen=230 last_return=-420.2 (+1 eps) [worker 3] episodes_seen=240 last_return=-315.4 (+1 eps) [worker 1] episodes_seen=240 last_return=-188.4 (+1 eps) [worker 2] episodes_seen=240 last_return=-363.4 (+1 eps) [worker 0] episodes_seen=240 last_return=-92.2 (+1 eps) [worker 3] episodes_seen=250 last_return=-449.0 (+1 eps) [worker 0] episodes_seen=250 last_return=-402.4 (+1 eps) [worker 1] episodes_seen=250 last_return=-115.1 (+1 eps) [worker 2] episodes_seen=250 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=260 last_return=-376.0 (+1 eps) [worker 0] episodes_seen=260 last_return=-129.8 (+1 eps) [worker 1] episodes_seen=260 last_return=-168.6 (+1 eps) [worker 2] episodes_seen=260 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=270 last_return=-335.6 (+1 eps) [worker 1] episodes_seen=270 last_return=-355.9 (+1 eps) [worker 0] episodes_seen=270 last_return=-380.8 (+1 eps) [worker 2] episodes_seen=270 last_return=-97.3 (+1 eps) [worker 3] episodes_seen=280 last_return=-240.2 (+1 eps) [worker 0] episodes_seen=280 last_return=-252.8 (+1 eps) [worker 1] episodes_seen=280 last_return=-338.2 (+1 eps) [worker 2] episodes_seen=280 last_return=-286.9 (+1 eps) [worker 3] episodes_seen=290 last_return=-176.5 (+1 eps) [worker 0] episodes_seen=290 last_return=-205.3 (+1 eps) [worker 1] episodes_seen=290 last_return=-352.1 (+1 eps) [worker 2] episodes_seen=290 last_return=-156.6 (+1 eps) [A2C][sync] it= 669 steps= 80280 (+120) avg10=-297.85 loss=428.193 pg=-0.001 vf=856.389 H=0.005 gn=3644.006 [worker 3] episodes_seen=300 last_return=-168.3 (+1 eps) [worker 0] episodes_seen=300 last_return=-257.9 (+1 eps) [worker 1] episodes_seen=300 last_return=-197.7 (+1 eps) [worker 2] episodes_seen=300 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=310 last_return=-544.0 (+1 eps) [worker 0] episodes_seen=310 last_return=-27.9 (+1 eps) [worker 1] episodes_seen=310 last_return=-249.9 (+1 eps) [worker 2] episodes_seen=310 last_return=-320.1 (+1 eps) [worker 3] episodes_seen=320 last_return=-312.9 (+1 eps) [worker 1] episodes_seen=320 last_return=-188.8 (+1 eps) [worker 0] episodes_seen=320 last_return=-202.1 (+1 eps) [worker 2] episodes_seen=320 last_return=-341.9 (+1 eps) [worker 3] episodes_seen=330 last_return=-167.4 (+1 eps) [worker 1] episodes_seen=330 last_return=-299.0 (+1 eps) [worker 0] episodes_seen=330 last_return=-341.3 (+1 eps) [worker 2] episodes_seen=330 last_return=-109.0 (+1 eps) [worker 3] episodes_seen=340 last_return=-298.7 (+1 eps) [worker 1] episodes_seen=340 last_return=-283.7 (+1 eps) [worker 0] episodes_seen=340 last_return=-220.1 (+1 eps) [worker 2] episodes_seen=340 last_return=-219.0 (+1 eps) [worker 3] episodes_seen=350 last_return=-216.0 (+1 eps) [worker 1] episodes_seen=350 last_return=-355.0 (+1 eps) [worker 0] episodes_seen=350 last_return=-293.5 (+1 eps) [worker 2] episodes_seen=350 last_return=-240.4 (+1 eps) [worker 3] episodes_seen=360 last_return=-313.5 (+1 eps) [worker 1] episodes_seen=360 last_return=-86.5 (+1 eps) [worker 0] episodes_seen=360 last_return=-194.3 (+1 eps) [worker 2] episodes_seen=360 last_return=-165.2 (+1 eps) [worker 3] episodes_seen=370 last_return=-275.1 (+1 eps) [worker 1] episodes_seen=370 last_return=-308.5 (+1 eps) [worker 0] episodes_seen=370 last_return=-336.8 (+1 eps) [worker 2] episodes_seen=370 last_return=-347.1 (+1 eps) [worker 3] episodes_seen=380 last_return=-207.4 (+1 eps) [worker 1] episodes_seen=380 last_return=-137.8 (+1 eps) [worker 0] episodes_seen=380 last_return=-183.4 (+1 eps) [worker 2] episodes_seen=380 last_return=-274.3 (+1 eps) [worker 3] episodes_seen=390 last_return=-426.9 (+1 eps) [worker 0] episodes_seen=390 last_return=-341.5 (+1 eps) [worker 1] episodes_seen=390 last_return=-255.6 (+1 eps) [worker 2] episodes_seen=390 last_return=-164.6 (+1 eps) [worker 3] episodes_seen=400 last_return=-289.4 (+1 eps) [worker 0] episodes_seen=400 last_return=-193.9 (+1 eps) [worker 1] episodes_seen=400 last_return=-263.4 (+1 eps) [worker 2] episodes_seen=400 last_return=-351.0 (+1 eps) [worker 3] episodes_seen=410 last_return=-291.0 (+1 eps) [worker 1] episodes_seen=410 last_return=-114.7 (+1 eps) [worker 0] episodes_seen=410 last_return=-88.0 (+1 eps) [worker 2] episodes_seen=410 last_return=-241.6 (+1 eps) [worker 3] episodes_seen=420 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=420 last_return=-234.4 (+1 eps) [worker 1] episodes_seen=420 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=420 last_return=-221.5 (+1 eps) [worker 3] episodes_seen=430 last_return=-367.5 (+1 eps) [worker 0] episodes_seen=430 last_return=-197.1 (+1 eps) [worker 1] episodes_seen=430 last_return=-271.5 (+1 eps) [worker 2] episodes_seen=430 last_return=-345.4 (+1 eps) [worker 3] episodes_seen=440 last_return=-168.9 (+1 eps) [worker 0] episodes_seen=440 last_return=-344.8 (+1 eps) [worker 1] episodes_seen=440 last_return=-343.2 (+1 eps) [worker 2] episodes_seen=440 last_return=-379.7 (+1 eps) [A2C][sync] it= 1003 steps= 120360 (+120) avg10=-300.65 loss=6102.923 pg=-0.077 vf=12206.021 H=0.338 gn=16803.328 [worker 3] episodes_seen=450 last_return=-177.0 (+1 eps) [worker 0] episodes_seen=450 last_return=-427.3 (+1 eps) [worker 1] episodes_seen=450 last_return=-202.3 (+1 eps) [worker 2] episodes_seen=450 last_return=-272.6 (+1 eps) [worker 3] episodes_seen=460 last_return=-230.4 (+1 eps) [worker 1] episodes_seen=460 last_return=-150.5 (+1 eps) [worker 0] episodes_seen=460 last_return=-122.6 (+1 eps) [worker 2] episodes_seen=460 last_return=-327.6 (+1 eps) [worker 3] episodes_seen=470 last_return=-114.4 (+1 eps) [worker 1] episodes_seen=470 last_return=-196.5 (+1 eps) [worker 0] episodes_seen=470 last_return=-304.6 (+1 eps) [worker 2] episodes_seen=470 last_return=-439.2 (+1 eps) [worker 3] episodes_seen=480 last_return=-257.2 (+1 eps) [worker 1] episodes_seen=480 last_return=-176.2 (+1 eps) [worker 0] episodes_seen=480 last_return=-218.5 (+1 eps) [worker 2] episodes_seen=480 last_return=-389.8 (+1 eps) [worker 1] episodes_seen=490 last_return=-285.3 (+1 eps) [worker 0] episodes_seen=490 last_return=-307.8 (+1 eps) [worker 2] episodes_seen=490 last_return=-214.0 (+1 eps) [worker 1] episodes_seen=500 last_return=-286.0 (+1 eps) [worker 0] episodes_seen=500 last_return=-370.1 (+1 eps) [worker 3] episodes_seen=490 last_return=-268.9 (+1 eps) [worker 2] episodes_seen=500 last_return=-139.0 (+1 eps) [worker 1] episodes_seen=510 last_return=-151.5 (+1 eps) [worker 0] episodes_seen=510 last_return=-451.6 (+1 eps) [worker 3] episodes_seen=500 last_return=-182.6 (+1 eps) [worker 2] episodes_seen=510 last_return=-250.0 (+1 eps) [worker 1] episodes_seen=520 last_return=-170.7 (+1 eps) [worker 0] episodes_seen=520 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=510 last_return=-363.2 (+1 eps) [worker 2] episodes_seen=520 last_return=-291.0 (+1 eps) [worker 1] episodes_seen=530 last_return=-234.4 (+1 eps) [worker 0] episodes_seen=530 last_return=-450.4 (+1 eps) [worker 3] episodes_seen=520 last_return=-156.4 (+1 eps) [worker 2] episodes_seen=530 last_return=-212.3 (+1 eps) [worker 0] episodes_seen=540 last_return=-355.8 (+1 eps) [worker 1] episodes_seen=540 last_return=-305.8 (+1 eps) [worker 3] episodes_seen=530 last_return=-23.7 (+1 eps) [worker 2] episodes_seen=540 last_return=-144.1 (+1 eps) [worker 0] episodes_seen=550 last_return=-287.1 (+1 eps) [worker 1] episodes_seen=550 last_return=-179.7 (+1 eps) [worker 3] episodes_seen=540 last_return=-181.1 (+1 eps) [worker 2] episodes_seen=550 last_return=-444.4 (+1 eps) [worker 0] episodes_seen=560 last_return=-612.5 (+1 eps) [worker 1] episodes_seen=560 last_return=-205.9 (+1 eps) [worker 3] episodes_seen=550 last_return=-333.1 (+1 eps) [worker 2] episodes_seen=560 last_return=-184.4 (+1 eps) [worker 0] episodes_seen=570 last_return=-415.0 (+1 eps) [worker 1] episodes_seen=570 last_return=-113.3 (+1 eps) [worker 3] episodes_seen=560 last_return=-221.1 (+1 eps) [worker 2] episodes_seen=570 last_return=-159.7 (+1 eps) [worker 0] episodes_seen=580 last_return=-174.9 (+1 eps) [worker 1] episodes_seen=580 last_return=-241.1 (+1 eps) [worker 3] episodes_seen=570 last_return=-357.0 (+1 eps) [worker 2] episodes_seen=580 last_return=-341.8 (+1 eps) [worker 0] episodes_seen=590 last_return=-142.4 (+1 eps) [worker 1] episodes_seen=590 last_return=-131.9 (+1 eps) [A2C][sync] it= 1337 steps= 160440 (+120) avg10=-281.80 loss=189.340 pg=-0.000 vf=378.681 H=0.006 gn=1486.016 [worker 3] episodes_seen=580 last_return=-214.7 (+1 eps) [worker 2] episodes_seen=590 last_return=-221.9 (+1 eps) [worker 0] episodes_seen=600 last_return=-94.8 (+1 eps) [worker 1] episodes_seen=600 last_return=-152.7 (+1 eps) [worker 3] episodes_seen=590 last_return=-66.5 (+1 eps) [worker 2] episodes_seen=600 last_return=70.6 (+1 eps) [worker 0] episodes_seen=610 last_return=-214.8 (+1 eps) [worker 1] episodes_seen=610 last_return=-123.6 (+1 eps) [worker 3] episodes_seen=600 last_return=-391.0 (+1 eps) [worker 0] episodes_seen=620 last_return=-337.9 (+1 eps) [worker 2] episodes_seen=610 last_return=-61.9 (+1 eps) [worker 1] episodes_seen=620 last_return=-275.0 (+1 eps) [worker 3] episodes_seen=610 last_return=-253.1 (+1 eps) [worker 2] episodes_seen=620 last_return=-283.3 (+1 eps) [worker 0] episodes_seen=630 last_return=-449.4 (+1 eps) [worker 1] episodes_seen=630 last_return=-246.5 (+1 eps) [worker 3] episodes_seen=620 last_return=-166.5 (+1 eps) [worker 0] episodes_seen=640 last_return=-220.7 (+1 eps) [worker 2] episodes_seen=630 last_return=-147.6 (+1 eps) [worker 1] episodes_seen=640 last_return=-118.7 (+1 eps) [worker 3] episodes_seen=630 last_return=-217.0 (+1 eps) [worker 2] episodes_seen=640 last_return=-191.3 (+1 eps) [worker 0] episodes_seen=650 last_return=-311.3 (+1 eps) [worker 1] episodes_seen=650 last_return=-252.5 (+1 eps) [worker 3] episodes_seen=640 last_return=-362.7 (+1 eps) [worker 2] episodes_seen=650 last_return=-323.8 (+1 eps) [worker 0] episodes_seen=660 last_return=-298.0 (+1 eps) [worker 1] episodes_seen=660 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=650 last_return=-463.9 (+1 eps) [worker 0] episodes_seen=670 last_return=-354.9 (+1 eps) [worker 2] episodes_seen=660 last_return=-196.3 (+1 eps) [worker 1] episodes_seen=670 last_return=-162.9 (+1 eps) [worker 3] episodes_seen=660 last_return=-260.8 (+1 eps) [worker 0] episodes_seen=680 last_return=-241.4 (+1 eps) [worker 2] episodes_seen=670 last_return=-156.2 (+1 eps) [worker 1] episodes_seen=680 last_return=-380.0 (+1 eps) [worker 3] episodes_seen=670 last_return=-192.0 (+1 eps) [worker 0] episodes_seen=690 last_return=-330.4 (+1 eps) [worker 1] episodes_seen=690 last_return=-442.4 (+1 eps) [worker 2] episodes_seen=680 last_return=-255.4 (+1 eps) [worker 3] episodes_seen=680 last_return=-424.4 (+1 eps) [worker 0] episodes_seen=700 last_return=-522.4 (+1 eps) [worker 2] episodes_seen=690 last_return=-397.0 (+1 eps) [worker 1] episodes_seen=700 last_return=-398.0 (+1 eps) [worker 3] episodes_seen=690 last_return=-114.3 (+1 eps) [worker 0] episodes_seen=710 last_return=-209.2 (+1 eps) [worker 1] episodes_seen=710 last_return=-222.6 (+1 eps) [worker 2] episodes_seen=700 last_return=-429.9 (+1 eps) [worker 3] episodes_seen=700 last_return=-177.0 (+1 eps) [worker 0] episodes_seen=720 last_return=-333.6 (+1 eps) [worker 2] episodes_seen=710 last_return=-382.3 (+1 eps) [worker 1] episodes_seen=720 last_return=-131.8 (+1 eps) [worker 3] episodes_seen=710 last_return=-109.4 (+1 eps) [worker 0] episodes_seen=730 last_return=-268.6 (+1 eps) [worker 2] episodes_seen=720 last_return=-240.7 (+1 eps) [worker 1] episodes_seen=730 last_return=-340.2 (+1 eps) [worker 3] episodes_seen=720 last_return=-122.6 (+1 eps) [worker 2] episodes_seen=730 last_return=-263.1 (+1 eps) [worker 0] episodes_seen=740 last_return=-135.6 (+1 eps) [worker 1] episodes_seen=740 last_return=-192.8 (+1 eps) [A2C][sync] it= 1671 steps= 200520 (+120) avg10=-267.61 loss=402.816 pg=0.002 vf=805.629 H=0.015 gn=2333.945 [worker 3] episodes_seen=730 last_return=-276.8 (+1 eps) [worker 0] episodes_seen=750 last_return=-334.7 (+1 eps) [worker 2] episodes_seen=740 last_return=-293.4 (+1 eps) [worker 3] episodes_seen=740 last_return=-181.9 (+1 eps) [worker 0] episodes_seen=760 last_return=-212.4 (+1 eps) [worker 2] episodes_seen=750 last_return=-300.0 (+1 eps) [worker 1] episodes_seen=750 last_return=-293.6 (+1 eps) [worker 3] episodes_seen=750 last_return=-347.7 (+1 eps) [worker 0] episodes_seen=770 last_return=-212.7 (+1 eps) [worker 2] episodes_seen=760 last_return=-212.7 (+1 eps) [worker 1] episodes_seen=760 last_return=-233.6 (+1 eps) [worker 3] episodes_seen=760 last_return=-251.5 (+1 eps) [worker 0] episodes_seen=780 last_return=-440.6 (+1 eps) [worker 2] episodes_seen=770 last_return=39.5 (+1 eps) [worker 1] episodes_seen=770 last_return=-139.5 (+1 eps) [worker 3] episodes_seen=770 last_return=-262.3 (+1 eps) [worker 0] episodes_seen=790 last_return=-301.3 (+1 eps) [worker 2] episodes_seen=780 last_return=-117.4 (+1 eps) [worker 1] episodes_seen=780 last_return=-75.1 (+1 eps) [worker 3] episodes_seen=780 last_return=-192.8 (+1 eps) [worker 0] episodes_seen=800 last_return=-127.5 (+1 eps) [worker 1] episodes_seen=790 last_return=-96.8 (+1 eps) [worker 2] episodes_seen=790 last_return=-216.7 (+1 eps) [worker 3] episodes_seen=790 last_return=-137.9 (+1 eps) [worker 1] episodes_seen=800 last_return=-225.3 (+1 eps) [worker 0] episodes_seen=810 last_return=-152.5 (+1 eps) [worker 2] episodes_seen=800 last_return=-106.4 (+1 eps) [worker 3] episodes_seen=800 last_return=-311.3 (+1 eps) [worker 0] episodes_seen=820 last_return=-115.9 (+1 eps) [worker 1] episodes_seen=810 last_return=-322.8 (+1 eps) [worker 2] episodes_seen=810 last_return=-216.4 (+1 eps) [worker 3] episodes_seen=810 last_return=-298.1 (+1 eps) [worker 0] episodes_seen=830 last_return=-190.0 (+1 eps) [worker 2] episodes_seen=820 last_return=-134.4 (+1 eps) [worker 1] episodes_seen=820 last_return=-49.7 (+1 eps) [worker 3] episodes_seen=820 last_return=-282.2 (+1 eps) [worker 0] episodes_seen=840 last_return=-187.4 (+1 eps) [worker 1] episodes_seen=830 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=830 last_return=-16.4 (+1 eps) [worker 3] episodes_seen=830 last_return=-58.7 (+1 eps) [worker 0] episodes_seen=850 last_return=-181.9 (+1 eps) [worker 1] episodes_seen=840 last_return=-125.1 (+1 eps) [worker 2] episodes_seen=840 last_return=-91.4 (+1 eps) [worker 3] episodes_seen=840 last_return=-236.8 (+1 eps) [worker 0] episodes_seen=860 last_return=-121.7 (+1 eps) [worker 2] episodes_seen=850 last_return=-134.9 (+1 eps) [worker 1] episodes_seen=850 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=850 last_return=-72.7 (+1 eps) [worker 2] episodes_seen=860 last_return=-98.3 (+1 eps) [worker 0] episodes_seen=870 last_return=-125.4 (+1 eps) [worker 1] episodes_seen=860 last_return=-90.6 (+1 eps) [worker 3] episodes_seen=860 last_return=-82.7 (+1 eps) [worker 1] episodes_seen=870 last_return=-115.1 (+1 eps) [worker 2] episodes_seen=870 last_return=-69.5 (+1 eps) [worker 0] episodes_seen=880 last_return=-99.8 (+1 eps) [A2C][sync] it= 2005 steps= 240600 (+120) avg10= -99.88 loss=262.455 pg=-0.048 vf=525.057 H=0.866 gn=1962.440 [worker 3] episodes_seen=870 last_return=-254.3 (+1 eps) [worker 0] episodes_seen=890 last_return=-110.2 (+1 eps) [worker 1] episodes_seen=880 last_return=-177.5 (+1 eps) [worker 2] episodes_seen=880 last_return=-235.0 (+1 eps) [worker 3] episodes_seen=880 last_return=-309.1 (+1 eps) [worker 0] episodes_seen=900 last_return=-397.4 (+1 eps) [worker 2] episodes_seen=890 last_return=-344.8 (+1 eps) [worker 1] episodes_seen=890 last_return=-114.9 (+1 eps) [worker 3] episodes_seen=890 last_return=-495.4 (+1 eps) [worker 2] episodes_seen=900 last_return=-187.2 (+1 eps) [worker 0] episodes_seen=910 last_return=-234.3 (+1 eps) [worker 1] episodes_seen=900 last_return=-111.3 (+1 eps) [worker 3] episodes_seen=900 last_return=-92.0 (+1 eps) [worker 1] episodes_seen=910 last_return=-92.6 (+1 eps) [worker 0] episodes_seen=920 last_return=74.2 (+1 eps) [worker 2] episodes_seen=910 last_return=-94.4 (+1 eps) [worker 3] episodes_seen=910 last_return=-99.5 (+1 eps) [worker 1] episodes_seen=920 last_return=-251.0 (+1 eps) [worker 0] episodes_seen=930 last_return=-70.0 (+1 eps) [worker 2] episodes_seen=920 last_return=-101.3 (+1 eps) [worker 3] episodes_seen=920 last_return=-127.9 (+1 eps) [worker 1] episodes_seen=930 last_return=-146.1 (+1 eps) [worker 0] episodes_seen=940 last_return=-184.1 (+1 eps) [worker 2] episodes_seen=930 last_return=-214.3 (+1 eps) [worker 3] episodes_seen=930 last_return=-293.6 (+1 eps) [worker 1] episodes_seen=940 last_return=-343.2 (+1 eps) [worker 0] episodes_seen=950 last_return=-360.3 (+1 eps) [worker 2] episodes_seen=940 last_return=-190.5 (+1 eps) [worker 3] episodes_seen=940 last_return=-189.2 (+1 eps) [worker 1] episodes_seen=950 last_return=-417.1 (+1 eps) [worker 2] episodes_seen=950 last_return=-113.1 (+1 eps) [worker 0] episodes_seen=960 last_return=-169.0 (+1 eps) [worker 3] episodes_seen=950 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=960 last_return=-257.7 (+1 eps) [worker 2] episodes_seen=960 last_return=-111.8 (+1 eps) [worker 0] episodes_seen=970 last_return=-211.1 (+1 eps) [worker 3] episodes_seen=960 last_return=-192.9 (+1 eps) [worker 1] episodes_seen=970 last_return=-204.6 (+1 eps) [worker 2] episodes_seen=970 last_return=-227.1 (+1 eps) [worker 0] episodes_seen=980 last_return=-325.8 (+1 eps) [worker 3] episodes_seen=970 last_return=-178.4 (+1 eps) [A2C][sync] it= 2339 steps= 280680 (+120) avg10=-216.54 loss=5854.972 pg=0.019 vf=11709.909 H=0.043 gn=46281.164 [worker 1] episodes_seen=980 last_return=-398.3 (+1 eps) [worker 0] episodes_seen=990 last_return=-237.8 (+1 eps) [worker 2] episodes_seen=980 last_return=-283.6 (+1 eps) [worker 3] episodes_seen=980 last_return=-199.8 (+1 eps) [worker 1] episodes_seen=990 last_return=-378.0 (+1 eps) [worker 0] episodes_seen=1000 last_return=-334.3 (+1 eps) [worker 2] episodes_seen=990 last_return=-149.7 (+1 eps) [worker 3] episodes_seen=990 last_return=39.4 (+1 eps) [worker 1] episodes_seen=1000 last_return=-98.4 (+1 eps) [worker 0] episodes_seen=1010 last_return=-100.9 (+1 eps) [worker 2] episodes_seen=1000 last_return=-110.3 (+1 eps) [worker 3] episodes_seen=1000 last_return=-85.1 (+1 eps) [worker 1] episodes_seen=1010 last_return=-62.4 (+1 eps) [worker 0] episodes_seen=1020 last_return=-73.8 (+1 eps) [worker 2] episodes_seen=1010 last_return=-80.8 (+1 eps) [worker 3] episodes_seen=1010 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1020 last_return=-106.1 (+1 eps) [worker 0] episodes_seen=1030 last_return=-89.4 (+1 eps) [worker 2] episodes_seen=1020 last_return=-103.3 (+1 eps) [worker 3] episodes_seen=1020 last_return=-96.9 (+1 eps) [worker 1] episodes_seen=1030 last_return=-82.0 (+1 eps) [worker 0] episodes_seen=1040 last_return=-101.1 (+1 eps) [worker 2] episodes_seen=1030 last_return=-88.1 (+1 eps) [worker 3] episodes_seen=1030 last_return=90.7 (+1 eps) [worker 1] episodes_seen=1040 last_return=-70.1 (+1 eps) [worker 0] episodes_seen=1050 last_return=-66.8 (+1 eps) [worker 2] episodes_seen=1040 last_return=-107.3 (+1 eps) [worker 3] episodes_seen=1040 last_return=-84.5 (+1 eps) [worker 0] episodes_seen=1060 last_return=-72.4 (+1 eps) [worker 1] episodes_seen=1050 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1050 last_return=-98.1 (+1 eps) [worker 3] episodes_seen=1050 last_return=-79.8 (+1 eps) [worker 1] episodes_seen=1060 last_return=-59.8 (+1 eps) [worker 0] episodes_seen=1070 last_return=-89.1 (+1 eps) [worker 2] episodes_seen=1060 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1070 last_return=-90.2 (+1 eps) [worker 3] episodes_seen=1060 last_return=-79.8 (+1 eps) [worker 0] episodes_seen=1080 last_return=-84.0 (+1 eps) [worker 2] episodes_seen=1070 last_return=-90.0 (+1 eps) [worker 1] episodes_seen=1080 last_return=-239.1 (+1 eps) [worker 3] episodes_seen=1070 last_return=-93.1 (+1 eps) [worker 0] episodes_seen=1090 last_return=-73.9 (+1 eps) [worker 2] episodes_seen=1080 last_return=-74.6 (+1 eps) [worker 1] episodes_seen=1090 last_return=-72.9 (+1 eps) [worker 3] episodes_seen=1080 last_return=-1.2 (+1 eps) [worker 0] episodes_seen=1100 last_return=-145.2 (+1 eps) [worker 2] episodes_seen=1090 last_return=-59.1 (+1 eps) [worker 1] episodes_seen=1100 last_return=-92.4 (+1 eps) [worker 3] episodes_seen=1090 last_return=-87.7 (+1 eps) [worker 0] episodes_seen=1110 last_return=-65.5 (+1 eps) [worker 2] episodes_seen=1100 last_return=-101.8 (+1 eps) [worker 1] episodes_seen=1110 last_return=-65.4 (+1 eps) [A2C][sync] it= 2673 steps= 320760 (+120) avg10= -77.42 loss=17.307 pg=-0.001 vf=34.616 H=0.003 gn=142.517 [worker 3] episodes_seen=1100 last_return=-83.8 (+1 eps) [worker 0] episodes_seen=1120 last_return=-101.9 (+1 eps) [worker 2] episodes_seen=1110 last_return=-75.5 (+1 eps) [worker 1] episodes_seen=1120 last_return=-110.7 (+1 eps) [worker 3] episodes_seen=1110 last_return=-74.0 (+1 eps) [worker 0] episodes_seen=1130 last_return=96.3 (+1 eps) [worker 2] episodes_seen=1120 last_return=-96.7 (+1 eps) [worker 1] episodes_seen=1130 last_return=-306.8 (+1 eps) [worker 3] episodes_seen=1120 last_return=-94.6 (+1 eps) [worker 0] episodes_seen=1140 last_return=-273.7 (+1 eps) [worker 2] episodes_seen=1130 last_return=-107.0 (+1 eps) [worker 1] episodes_seen=1140 last_return=-112.8 (+1 eps) [worker 3] episodes_seen=1130 last_return=-85.7 (+1 eps) [worker 0] episodes_seen=1150 last_return=-94.4 (+1 eps) [worker 2] episodes_seen=1140 last_return=-73.5 (+1 eps) [worker 1] episodes_seen=1150 last_return=-83.2 (+1 eps) [worker 3] episodes_seen=1140 last_return=-91.8 (+1 eps) [worker 0] episodes_seen=1160 last_return=-192.8 (+1 eps) [worker 2] episodes_seen=1150 last_return=-208.8 (+1 eps) [worker 1] episodes_seen=1160 last_return=-95.0 (+1 eps) [worker 3] episodes_seen=1150 last_return=-91.2 (+1 eps) [worker 0] episodes_seen=1170 last_return=-163.1 (+1 eps) [worker 2] episodes_seen=1160 last_return=-148.1 (+1 eps) [worker 1] episodes_seen=1170 last_return=-108.3 (+1 eps) [worker 3] episodes_seen=1160 last_return=-210.2 (+1 eps) [worker 0] episodes_seen=1180 last_return=-204.2 (+1 eps) [worker 2] episodes_seen=1170 last_return=-72.2 (+1 eps) [worker 3] episodes_seen=1170 last_return=-230.8 (+1 eps) [worker 1] episodes_seen=1180 last_return=-91.0 (+1 eps) [worker 2] episodes_seen=1180 last_return=-185.8 (+1 eps) [worker 0] episodes_seen=1190 last_return=-137.5 (+1 eps) [worker 3] episodes_seen=1180 last_return=-251.9 (+1 eps) [worker 1] episodes_seen=1190 last_return=-126.5 (+1 eps) [worker 2] episodes_seen=1190 last_return=-247.6 (+1 eps) [worker 0] episodes_seen=1200 last_return=-171.7 (+1 eps) [worker 3] episodes_seen=1190 last_return=-299.6 (+1 eps) [worker 1] episodes_seen=1200 last_return=-150.0 (+1 eps) [worker 0] episodes_seen=1210 last_return=-344.6 (+1 eps) [worker 2] episodes_seen=1200 last_return=-360.8 (+1 eps) [worker 3] episodes_seen=1200 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1210 last_return=-209.2 (+1 eps) [worker 0] episodes_seen=1220 last_return=-282.7 (+1 eps) [worker 2] episodes_seen=1210 last_return=-63.1 (+1 eps) [worker 3] episodes_seen=1210 last_return=-317.7 (+1 eps) [worker 1] episodes_seen=1220 last_return=-465.8 (+1 eps) [worker 0] episodes_seen=1230 last_return=0.4 (+1 eps) [worker 2] episodes_seen=1220 last_return=-310.3 (+1 eps) [worker 3] episodes_seen=1220 last_return=-120.0 (+1 eps) [worker 1] episodes_seen=1230 last_return=-117.7 (+1 eps) [worker 0] episodes_seen=1240 last_return=-46.3 (+1 eps) [worker 2] episodes_seen=1230 last_return=-131.0 (+1 eps) [worker 3] episodes_seen=1230 last_return=-117.4 (+1 eps) [worker 1] episodes_seen=1240 last_return=-75.4 (+1 eps) [worker 0] episodes_seen=1250 last_return=-109.5 (+1 eps) [worker 2] episodes_seen=1240 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=1240 last_return=-139.5 (+1 eps) [worker 1] episodes_seen=1250 last_return=-147.3 (+1 eps) [worker 0] episodes_seen=1260 last_return=-102.7 (+1 eps) [A2C][sync] it= 3007 steps= 360840 (+120) avg10=-136.77 loss=1061.206 pg=-0.071 vf=2122.585 H=0.514 gn=6030.666 [worker 2] episodes_seen=1250 last_return=-92.2 (+1 eps) [worker 3] episodes_seen=1250 last_return=-132.3 (+1 eps) [worker 1] episodes_seen=1260 last_return=-189.7 (+1 eps) [worker 2] episodes_seen=1260 last_return=-76.0 (+1 eps) [worker 0] episodes_seen=1270 last_return=-155.8 (+1 eps) [worker 3] episodes_seen=1260 last_return=-82.3 (+1 eps) [worker 1] episodes_seen=1270 last_return=-167.4 (+1 eps) [worker 2] episodes_seen=1270 last_return=-112.8 (+1 eps) [worker 0] episodes_seen=1280 last_return=-182.5 (+1 eps) [worker 3] episodes_seen=1270 last_return=-102.6 (+1 eps) [worker 1] episodes_seen=1280 last_return=-53.5 (+1 eps) [worker 2] episodes_seen=1280 last_return=-88.8 (+1 eps) [worker 0] episodes_seen=1290 last_return=-16.1 (+1 eps) [worker 3] episodes_seen=1280 last_return=-34.5 (+1 eps) [worker 1] episodes_seen=1290 last_return=-283.1 (+1 eps) [worker 2] episodes_seen=1290 last_return=-119.0 (+1 eps) [worker 0] episodes_seen=1300 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=1290 last_return=-90.6 (+1 eps) [worker 1] episodes_seen=1300 last_return=-91.1 (+1 eps) [worker 2] episodes_seen=1300 last_return=-103.5 (+1 eps) [worker 0] episodes_seen=1310 last_return=-12.2 (+1 eps) [worker 3] episodes_seen=1300 last_return=-95.9 (+1 eps) [worker 1] episodes_seen=1310 last_return=-83.6 (+1 eps) [worker 2] episodes_seen=1310 last_return=-26.8 (+1 eps) [worker 0] episodes_seen=1320 last_return=-119.7 (+1 eps) [worker 3] episodes_seen=1310 last_return=-73.9 (+1 eps) [worker 1] episodes_seen=1320 last_return=-151.5 (+1 eps) [worker 2] episodes_seen=1320 last_return=-205.2 (+1 eps) [worker 0] episodes_seen=1330 last_return=-130.1 (+1 eps) [worker 3] episodes_seen=1320 last_return=-129.0 (+1 eps) [worker 1] episodes_seen=1330 last_return=-116.8 (+1 eps) [worker 2] episodes_seen=1330 last_return=-331.2 (+1 eps) [worker 0] episodes_seen=1340 last_return=-187.0 (+1 eps) [worker 3] episodes_seen=1330 last_return=-142.9 (+1 eps) [worker 1] episodes_seen=1340 last_return=-197.7 (+1 eps) [worker 2] episodes_seen=1340 last_return=-231.0 (+1 eps) [worker 0] episodes_seen=1350 last_return=-154.7 (+1 eps) [worker 3] episodes_seen=1340 last_return=-106.2 (+1 eps) [worker 1] episodes_seen=1350 last_return=-146.1 (+1 eps) [worker 2] episodes_seen=1350 last_return=-369.4 (+1 eps) [worker 0] episodes_seen=1360 last_return=-360.9 (+1 eps) [worker 3] episodes_seen=1350 last_return=-242.5 (+1 eps) [worker 1] episodes_seen=1360 last_return=-171.7 (+1 eps) [worker 2] episodes_seen=1360 last_return=-147.0 (+1 eps) [worker 0] episodes_seen=1370 last_return=-176.8 (+1 eps) [worker 3] episodes_seen=1360 last_return=-153.6 (+1 eps) [worker 1] episodes_seen=1370 last_return=-399.6 (+1 eps) [worker 2] episodes_seen=1370 last_return=-176.8 (+1 eps) [worker 0] episodes_seen=1380 last_return=-365.2 (+1 eps) [worker 3] episodes_seen=1370 last_return=-468.4 (+1 eps) [worker 1] episodes_seen=1380 last_return=-289.7 (+1 eps) [worker 2] episodes_seen=1380 last_return=-175.8 (+1 eps) [worker 0] episodes_seen=1390 last_return=-160.1 (+1 eps) [worker 3] episodes_seen=1380 last_return=-214.1 (+1 eps) [worker 1] episodes_seen=1390 last_return=-244.7 (+1 eps) [worker 2] episodes_seen=1390 last_return=-158.5 (+1 eps) [worker 0] episodes_seen=1400 last_return=-175.6 (+1 eps) [worker 3] episodes_seen=1390 last_return=-432.1 (+1 eps) [worker 1] episodes_seen=1400 last_return=-100.0 (+1 eps) [A2C][sync] it= 3341 steps= 400920 (+120) avg10=-302.81 loss=82.410 pg=0.001 vf=164.819 H=0.011 gn=473.781 [worker 2] episodes_seen=1400 last_return=-97.0 (+1 eps) [worker 3] episodes_seen=1400 last_return=-552.5 (+1 eps) [worker 0] episodes_seen=1410 last_return=-165.7 (+1 eps) [worker 1] episodes_seen=1410 last_return=-276.5 (+1 eps) [worker 2] episodes_seen=1410 last_return=-366.7 (+1 eps) [worker 0] episodes_seen=1420 last_return=-178.9 (+1 eps) [worker 3] episodes_seen=1410 last_return=-97.9 (+1 eps) [worker 1] episodes_seen=1420 last_return=-410.3 (+1 eps) [worker 2] episodes_seen=1420 last_return=-211.3 (+1 eps) [worker 0] episodes_seen=1430 last_return=-147.6 (+1 eps) [worker 3] episodes_seen=1420 last_return=-146.5 (+1 eps) [worker 1] episodes_seen=1430 last_return=-131.9 (+1 eps) [worker 2] episodes_seen=1430 last_return=-208.4 (+1 eps) [worker 0] episodes_seen=1440 last_return=-144.7 (+1 eps) [worker 3] episodes_seen=1430 last_return=-229.6 (+1 eps) [worker 1] episodes_seen=1440 last_return=-131.3 (+1 eps) [worker 2] episodes_seen=1440 last_return=-65.9 (+1 eps) [worker 0] episodes_seen=1450 last_return=-123.2 (+1 eps) [worker 3] episodes_seen=1440 last_return=-104.4 (+1 eps) [worker 1] episodes_seen=1450 last_return=-82.6 (+1 eps) [worker 2] episodes_seen=1450 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=1450 last_return=64.8 (+1 eps) [worker 1] episodes_seen=1460 last_return=-118.7 (+1 eps) [worker 0] episodes_seen=1460 last_return=-42.6 (+1 eps) [worker 2] episodes_seen=1460 last_return=-127.1 (+1 eps) [worker 3] episodes_seen=1460 last_return=-239.9 (+1 eps) [worker 1] episodes_seen=1470 last_return=-334.9 (+1 eps) [worker 0] episodes_seen=1470 last_return=-245.7 (+1 eps) [worker 2] episodes_seen=1470 last_return=-163.4 (+1 eps) [worker 3] episodes_seen=1470 last_return=-273.6 (+1 eps) [worker 0] episodes_seen=1480 last_return=-199.5 (+1 eps) [worker 1] episodes_seen=1480 last_return=-235.0 (+1 eps) [worker 2] episodes_seen=1480 last_return=-176.1 (+1 eps) [worker 3] episodes_seen=1480 last_return=-136.4 (+1 eps) [worker 0] episodes_seen=1490 last_return=-79.3 (+1 eps) [worker 1] episodes_seen=1490 last_return=-108.1 (+1 eps) [worker 2] episodes_seen=1490 last_return=-134.3 (+1 eps) [worker 3] episodes_seen=1490 last_return=-67.9 (+1 eps) [worker 1] episodes_seen=1500 last_return=-75.6 (+1 eps) [worker 0] episodes_seen=1500 last_return=-55.8 (+1 eps) [worker 2] episodes_seen=1500 last_return=-74.4 (+1 eps) [worker 3] episodes_seen=1500 last_return=93.3 (+1 eps) [worker 1] episodes_seen=1510 last_return=-150.2 (+1 eps) [worker 0] episodes_seen=1510 last_return=-107.0 (+1 eps) [worker 2] episodes_seen=1510 last_return=-76.1 (+1 eps) [worker 3] episodes_seen=1510 last_return=-87.9 (+1 eps) [worker 1] episodes_seen=1520 last_return=-131.3 (+1 eps) [worker 0] episodes_seen=1520 last_return=-226.8 (+1 eps) [worker 2] episodes_seen=1520 last_return=-259.8 (+1 eps) [worker 3] episodes_seen=1520 last_return=-98.7 (+1 eps) [worker 1] episodes_seen=1530 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1530 last_return=-112.2 (+1 eps) [worker 2] episodes_seen=1530 last_return=-108.7 (+1 eps) [worker 3] episodes_seen=1530 last_return=-66.5 (+1 eps) [worker 1] episodes_seen=1540 last_return=-89.4 (+1 eps) [worker 0] episodes_seen=1540 last_return=-102.9 (+1 eps) [worker 2] episodes_seen=1540 last_return=-100.8 (+1 eps) [A2C][sync] it= 3675 steps= 441000 (+120) avg10=-124.59 loss=1164.294 pg=0.149 vf=2328.316 H=0.419 gn=3821.058 [worker 3] episodes_seen=1540 last_return=-91.3 (+1 eps) [worker 1] episodes_seen=1550 last_return=-73.6 (+1 eps) [worker 0] episodes_seen=1550 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1550 last_return=-107.7 (+1 eps) [worker 3] episodes_seen=1550 last_return=-106.2 (+1 eps) [worker 0] episodes_seen=1560 last_return=-124.1 (+1 eps) [worker 1] episodes_seen=1560 last_return=-114.6 (+1 eps) [worker 2] episodes_seen=1560 last_return=-90.6 (+1 eps) [worker 0] episodes_seen=1570 last_return=-85.0 (+1 eps) [worker 3] episodes_seen=1560 last_return=-37.5 (+1 eps) [worker 1] episodes_seen=1570 last_return=-69.0 (+1 eps) [worker 2] episodes_seen=1570 last_return=-101.0 (+1 eps) [worker 1] episodes_seen=1580 last_return=-73.1 (+1 eps) [worker 0] episodes_seen=1580 last_return=-107.8 (+1 eps) [worker 3] episodes_seen=1570 last_return=-94.2 (+1 eps) [worker 2] episodes_seen=1580 last_return=-92.8 (+1 eps) [worker 1] episodes_seen=1590 last_return=-108.7 (+1 eps) [worker 3] episodes_seen=1580 last_return=-84.9 (+1 eps) [worker 0] episodes_seen=1590 last_return=-153.4 (+1 eps) [worker 2] episodes_seen=1590 last_return=-77.0 (+1 eps) [worker 1] episodes_seen=1600 last_return=-313.6 (+1 eps) [worker 3] episodes_seen=1590 last_return=-243.5 (+1 eps) [worker 0] episodes_seen=1600 last_return=-244.3 (+1 eps) [worker 2] episodes_seen=1600 last_return=-250.3 (+1 eps) [worker 3] episodes_seen=1600 last_return=-261.6 (+1 eps) [worker 1] episodes_seen=1610 last_return=-140.8 (+1 eps) [worker 0] episodes_seen=1610 last_return=-140.9 (+1 eps) [worker 2] episodes_seen=1610 last_return=-148.2 (+1 eps) [worker 3] episodes_seen=1610 last_return=-104.7 (+1 eps) [worker 1] episodes_seen=1620 last_return=-107.5 (+1 eps) [worker 0] episodes_seen=1620 last_return=-95.8 (+1 eps) [worker 2] episodes_seen=1620 last_return=-94.8 (+1 eps) [worker 3] episodes_seen=1620 last_return=-96.0 (+1 eps) [worker 1] episodes_seen=1630 last_return=-99.5 (+1 eps) [worker 0] episodes_seen=1630 last_return=-95.7 (+1 eps) [worker 2] episodes_seen=1630 last_return=-94.5 (+1 eps) [worker 3] episodes_seen=1630 last_return=-99.2 (+1 eps) [worker 0] episodes_seen=1640 last_return=-73.8 (+1 eps) [worker 1] episodes_seen=1640 last_return=-82.4 (+1 eps) [worker 2] episodes_seen=1640 last_return=-100.9 (+1 eps) [worker 3] episodes_seen=1640 last_return=-91.4 (+1 eps) [worker 0] episodes_seen=1650 last_return=-110.9 (+1 eps) [worker 2] episodes_seen=1650 last_return=-185.1 (+1 eps) [worker 1] episodes_seen=1650 last_return=-81.3 (+1 eps) [worker 3] episodes_seen=1650 last_return=-35.4 (+1 eps) [worker 0] episodes_seen=1660 last_return=-74.1 (+1 eps) [worker 2] episodes_seen=1660 last_return=-107.3 (+1 eps) [worker 1] episodes_seen=1660 last_return=-85.9 (+1 eps) [worker 3] episodes_seen=1660 last_return=-74.5 (+1 eps) [worker 0] episodes_seen=1670 last_return=-107.6 (+1 eps) [worker 2] episodes_seen=1670 last_return=-94.1 (+1 eps) [worker 1] episodes_seen=1670 last_return=-115.3 (+1 eps) [worker 3] episodes_seen=1670 last_return=-92.6 (+1 eps) [worker 0] episodes_seen=1680 last_return=-57.0 (+1 eps) [worker 2] episodes_seen=1680 last_return=-77.5 (+1 eps) [worker 1] episodes_seen=1680 last_return=-105.5 (+1 eps) [worker 3] episodes_seen=1680 last_return=-80.5 (+1 eps) [A2C][sync] it= 4009 steps= 481080 (+120) avg10= -67.20 loss=230.377 pg=-0.156 vf=461.121 H=0.913 gn=1850.176 [worker 0] episodes_seen=1690 last_return=-89.4 (+1 eps) [worker 2] episodes_seen=1690 last_return=97.4 (+1 eps) [worker 1] episodes_seen=1690 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1700 last_return=-96.9 (+1 eps) [worker 3] episodes_seen=1690 last_return=-93.5 (+1 eps) [worker 2] episodes_seen=1700 last_return=-68.5 (+1 eps) [worker 1] episodes_seen=1700 last_return=-73.3 (+1 eps) [worker 0] episodes_seen=1710 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=1700 last_return=-74.6 (+1 eps) [worker 2] episodes_seen=1710 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1710 last_return=-92.3 (+1 eps) [worker 0] episodes_seen=1720 last_return=-81.4 (+1 eps) [worker 3] episodes_seen=1710 last_return=-112.2 (+1 eps) [worker 2] episodes_seen=1720 last_return=-54.0 (+1 eps) [worker 1] episodes_seen=1720 last_return=-123.7 (+1 eps) [worker 0] episodes_seen=1730 last_return=-58.7 (+1 eps) [worker 3] episodes_seen=1720 last_return=-40.0 (+1 eps) [worker 2] episodes_seen=1730 last_return=-91.6 (+1 eps) [worker 1] episodes_seen=1730 last_return=-99.6 (+1 eps) [worker 0] episodes_seen=1740 last_return=-127.0 (+1 eps) [worker 3] episodes_seen=1730 last_return=-106.1 (+1 eps) [worker 1] episodes_seen=1740 last_return=-102.7 (+1 eps) [worker 2] episodes_seen=1740 last_return=-84.8 (+1 eps) [worker 0] episodes_seen=1750 last_return=-149.9 (+1 eps) [worker 3] episodes_seen=1740 last_return=-79.4 (+1 eps) [worker 2] episodes_seen=1750 last_return=-74.7 (+1 eps) [worker 1] episodes_seen=1750 last_return=-68.3 (+1 eps) [worker 0] episodes_seen=1760 last_return=9.8 (+1 eps) [worker 3] episodes_seen=1750 last_return=-77.7 (+1 eps) [worker 2] episodes_seen=1760 last_return=-79.3 (+1 eps) [worker 1] episodes_seen=1760 last_return=-78.0 (+1 eps) [worker 3] episodes_seen=1760 last_return=-144.5 (+1 eps) [worker 0] episodes_seen=1770 last_return=-30.3 (+1 eps) [worker 2] episodes_seen=1770 last_return=-95.5 (+1 eps) [worker 1] episodes_seen=1770 last_return=-84.2 (+1 eps) [worker 0] episodes_seen=1780 last_return=-134.9 (+1 eps) [worker 3] episodes_seen=1770 last_return=-148.0 (+1 eps) [worker 1] episodes_seen=1780 last_return=-108.7 (+1 eps) [worker 2] episodes_seen=1780 last_return=-93.2 (+1 eps) [worker 0] episodes_seen=1790 last_return=-58.1 (+1 eps) [worker 3] episodes_seen=1780 last_return=-83.6 (+1 eps) [worker 1] episodes_seen=1790 last_return=-104.3 (+1 eps) [worker 2] episodes_seen=1790 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1800 last_return=-80.1 (+1 eps) [worker 3] episodes_seen=1790 last_return=-101.6 (+1 eps) [worker 1] episodes_seen=1800 last_return=-117.5 (+1 eps) [worker 2] episodes_seen=1800 last_return=-95.9 (+1 eps) [worker 0] episodes_seen=1810 last_return=-46.3 (+1 eps) [worker 1] episodes_seen=1810 last_return=-90.4 (+1 eps) [worker 3] episodes_seen=1800 last_return=-74.1 (+1 eps) [worker 2] episodes_seen=1810 last_return=-101.4 (+1 eps) [worker 0] episodes_seen=1820 last_return=-91.5 (+1 eps) [worker 1] episodes_seen=1820 last_return=117.4 (+1 eps) [worker 2] episodes_seen=1820 last_return=-94.6 (+1 eps) [worker 3] episodes_seen=1810 last_return=-117.1 (+1 eps) [worker 0] episodes_seen=1830 last_return=-93.9 (+1 eps) [A2C][sync] it= 4343 steps= 521160 (+120) avg10= -93.94 loss=52.247 pg=-0.080 vf=104.705 H=0.840 gn=427.092 [worker 1] episodes_seen=1830 last_return=-106.5 (+1 eps) [worker 2] episodes_seen=1830 last_return=-93.6 (+1 eps) [worker 3] episodes_seen=1820 last_return=-74.0 (+1 eps) [worker 0] episodes_seen=1840 last_return=-102.4 (+1 eps) [worker 1] episodes_seen=1840 last_return=-70.5 (+1 eps) [worker 2] episodes_seen=1840 last_return=-67.3 (+1 eps) [worker 3] episodes_seen=1830 last_return=-89.9 (+1 eps) [worker 0] episodes_seen=1850 last_return=-77.8 (+1 eps) [worker 1] episodes_seen=1850 last_return=-83.2 (+1 eps) [worker 2] episodes_seen=1850 last_return=-82.0 (+1 eps) [worker 3] episodes_seen=1840 last_return=-102.7 (+1 eps) [worker 0] episodes_seen=1860 last_return=-84.1 (+1 eps) [worker 1] episodes_seen=1860 last_return=-104.6 (+1 eps) [worker 2] episodes_seen=1860 last_return=-74.6 (+1 eps) [worker 3] episodes_seen=1850 last_return=-106.2 (+1 eps) [worker 0] episodes_seen=1870 last_return=-95.9 (+1 eps) [worker 1] episodes_seen=1870 last_return=-92.3 (+1 eps) [worker 2] episodes_seen=1870 last_return=-79.0 (+1 eps) [worker 3] episodes_seen=1860 last_return=-90.1 (+1 eps) [worker 0] episodes_seen=1880 last_return=-80.7 (+1 eps) [worker 1] episodes_seen=1880 last_return=-100.5 (+1 eps) [worker 2] episodes_seen=1880 last_return=-72.4 (+1 eps) [worker 3] episodes_seen=1870 last_return=-106.9 (+1 eps) [worker 0] episodes_seen=1890 last_return=-111.9 (+1 eps) [worker 1] episodes_seen=1890 last_return=-67.2 (+1 eps) [worker 2] episodes_seen=1890 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=1880 last_return=-80.0 (+1 eps) [worker 0] episodes_seen=1900 last_return=-102.9 (+1 eps) [worker 1] episodes_seen=1900 last_return=-73.2 (+1 eps) [worker 3] episodes_seen=1890 last_return=12.2 (+1 eps) [worker 2] episodes_seen=1900 last_return=-62.8 (+1 eps) [worker 0] episodes_seen=1910 last_return=-82.3 (+1 eps) [worker 1] episodes_seen=1910 last_return=-93.6 (+1 eps) [worker 3] episodes_seen=1900 last_return=-68.7 (+1 eps) [worker 2] episodes_seen=1910 last_return=-110.1 (+1 eps) [worker 0] episodes_seen=1920 last_return=-75.1 (+1 eps) [worker 1] episodes_seen=1920 last_return=-141.9 (+1 eps) [worker 3] episodes_seen=1910 last_return=-136.0 (+1 eps) [worker 2] episodes_seen=1920 last_return=-100.5 (+1 eps) [worker 0] episodes_seen=1930 last_return=-87.7 (+1 eps) [worker 1] episodes_seen=1930 last_return=-75.7 (+1 eps) [worker 3] episodes_seen=1920 last_return=-102.9 (+1 eps) [worker 2] episodes_seen=1930 last_return=-62.6 (+1 eps) [worker 0] episodes_seen=1940 last_return=-84.1 (+1 eps) [worker 1] episodes_seen=1940 last_return=-105.1 (+1 eps) [worker 3] episodes_seen=1930 last_return=-92.4 (+1 eps) [worker 2] episodes_seen=1940 last_return=-109.2 (+1 eps) [worker 0] episodes_seen=1950 last_return=-89.8 (+1 eps) [worker 1] episodes_seen=1950 last_return=-96.8 (+1 eps) [worker 3] episodes_seen=1940 last_return=-85.2 (+1 eps) [worker 2] episodes_seen=1950 last_return=-68.2 (+1 eps) [worker 0] episodes_seen=1960 last_return=-90.7 (+1 eps) [worker 1] episodes_seen=1960 last_return=-83.9 (+1 eps) [worker 3] episodes_seen=1950 last_return=-88.9 (+1 eps) [worker 2] episodes_seen=1960 last_return=-83.8 (+1 eps) [worker 0] episodes_seen=1970 last_return=-115.8 (+1 eps) [worker 1] episodes_seen=1970 last_return=-97.0 (+1 eps) [worker 3] episodes_seen=1960 last_return=-68.1 (+1 eps) [A2C][sync] it= 4677 steps= 561240 (+120) avg10=-135.69 loss=796.754 pg=0.318 vf=1592.915 H=0.722 gn=4011.427 [worker 2] episodes_seen=1970 last_return=-206.0 (+1 eps) [worker 0] episodes_seen=1980 last_return=-188.7 (+1 eps) [worker 1] episodes_seen=1980 last_return=-216.1 (+1 eps) [worker 3] episodes_seen=1970 last_return=88.5 (+1 eps) [worker 2] episodes_seen=1980 last_return=-107.8 (+1 eps) [worker 0] episodes_seen=1990 last_return=-82.4 (+1 eps) [worker 1] episodes_seen=1990 last_return=-125.3 (+1 eps) [worker 3] episodes_seen=1980 last_return=-70.4 (+1 eps) [worker 2] episodes_seen=1990 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=2000 last_return=-108.0 (+1 eps) [worker 1] episodes_seen=2000 last_return=-91.1 (+1 eps) [worker 3] episodes_seen=1990 last_return=-90.0 (+1 eps) [worker 2] episodes_seen=2000 last_return=-115.9 (+1 eps) [worker 0] episodes_seen=2010 last_return=-100.8 (+1 eps) [worker 1] episodes_seen=2010 last_return=-101.6 (+1 eps) [worker 3] episodes_seen=2000 last_return=-91.9 (+1 eps) [worker 2] episodes_seen=2010 last_return=-98.9 (+1 eps) [worker 0] episodes_seen=2020 last_return=-86.5 (+1 eps) [worker 1] episodes_seen=2020 last_return=-94.9 (+1 eps) [worker 3] episodes_seen=2010 last_return=-84.2 (+1 eps) [worker 2] episodes_seen=2020 last_return=-88.9 (+1 eps) [worker 0] episodes_seen=2030 last_return=-63.0 (+1 eps) [worker 1] episodes_seen=2030 last_return=-92.6 (+1 eps) [worker 3] episodes_seen=2020 last_return=-66.4 (+1 eps) [worker 2] episodes_seen=2030 last_return=-108.4 (+1 eps) [worker 0] episodes_seen=2040 last_return=-107.3 (+1 eps) [worker 1] episodes_seen=2040 last_return=-89.2 (+1 eps) [worker 3] episodes_seen=2030 last_return=-57.8 (+1 eps) [worker 2] episodes_seen=2040 last_return=-92.9 (+1 eps) [worker 0] episodes_seen=2050 last_return=-82.4 (+1 eps) [worker 1] episodes_seen=2050 last_return=-112.0 (+1 eps) [worker 3] episodes_seen=2040 last_return=-97.3 (+1 eps) [worker 2] episodes_seen=2050 last_return=-108.3 (+1 eps) [worker 0] episodes_seen=2060 last_return=-88.2 (+1 eps) [worker 1] episodes_seen=2060 last_return=-93.8 (+1 eps) [worker 3] episodes_seen=2050 last_return=-126.6 (+1 eps) [worker 2] episodes_seen=2060 last_return=-85.9 (+1 eps) [worker 0] episodes_seen=2070 last_return=-68.3 (+1 eps) [worker 3] episodes_seen=2060 last_return=-105.2 (+1 eps) [worker 1] episodes_seen=2070 last_return=-74.5 (+1 eps) [worker 2] episodes_seen=2070 last_return=-110.6 (+1 eps) [worker 0] episodes_seen=2080 last_return=-97.3 (+1 eps) [worker 1] episodes_seen=2080 last_return=-61.3 (+1 eps) [worker 3] episodes_seen=2070 last_return=-106.7 (+1 eps) [worker 2] episodes_seen=2080 last_return=-84.8 (+1 eps) [worker 0] episodes_seen=2090 last_return=-113.7 (+1 eps) [worker 3] episodes_seen=2080 last_return=-100.8 (+1 eps) [worker 1] episodes_seen=2090 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=2090 last_return=-52.7 (+1 eps) [worker 0] episodes_seen=2100 last_return=-112.1 (+1 eps) [worker 3] episodes_seen=2090 last_return=-90.8 (+1 eps) [worker 1] episodes_seen=2100 last_return=-75.4 (+1 eps) [worker 2] episodes_seen=2100 last_return=-56.9 (+1 eps) [worker 0] episodes_seen=2110 last_return=-74.8 (+1 eps) [worker 1] episodes_seen=2110 last_return=-95.6 (+1 eps) [worker 3] episodes_seen=2100 last_return=-106.6 (+1 eps) [worker 2] episodes_seen=2110 last_return=-64.6 (+1 eps) [worker 0] episodes_seen=2120 last_return=-79.5 (+1 eps) [A2C][sync] it= 5011 steps= 601320 (+120) avg10= -80.16 loss=34.649 pg=-0.086 vf=69.502 H=0.543 gn=683.886 [worker 1] episodes_seen=2120 last_return=-137.3 (+1 eps) [worker 3] episodes_seen=2110 last_return=-87.2 (+1 eps) [worker 2] episodes_seen=2120 last_return=-106.3 (+1 eps) [worker 0] episodes_seen=2130 last_return=-91.0 (+1 eps) [worker 1] episodes_seen=2130 last_return=-97.1 (+1 eps) [worker 3] episodes_seen=2120 last_return=-67.9 (+1 eps) [worker 2] episodes_seen=2130 last_return=-189.3 (+1 eps) [worker 0] episodes_seen=2140 last_return=-88.5 (+1 eps) [worker 1] episodes_seen=2140 last_return=-98.5 (+1 eps) [worker 3] episodes_seen=2130 last_return=-94.7 (+1 eps) [worker 2] episodes_seen=2140 last_return=-122.8 (+1 eps) [worker 0] episodes_seen=2150 last_return=-111.6 (+1 eps) [worker 1] episodes_seen=2150 last_return=-115.5 (+1 eps) [worker 3] episodes_seen=2140 last_return=-100.6 (+1 eps) [worker 2] episodes_seen=2150 last_return=-79.9 (+1 eps) [worker 0] episodes_seen=2160 last_return=-89.7 (+1 eps) [worker 1] episodes_seen=2160 last_return=-90.3 (+1 eps) [worker 2] episodes_seen=2160 last_return=-88.8 (+1 eps) [worker 3] episodes_seen=2150 last_return=-83.6 (+1 eps) [worker 0] episodes_seen=2170 last_return=-107.8 (+1 eps) [worker 1] episodes_seen=2170 last_return=-85.7 (+1 eps) [worker 2] episodes_seen=2170 last_return=-92.3 (+1 eps) [worker 3] episodes_seen=2160 last_return=-88.3 (+1 eps) [worker 0] episodes_seen=2180 last_return=-87.1 (+1 eps) [worker 1] episodes_seen=2180 last_return=-63.4 (+1 eps) [worker 2] episodes_seen=2180 last_return=-108.9 (+1 eps) [worker 3] episodes_seen=2170 last_return=-94.3 (+1 eps) [worker 0] episodes_seen=2190 last_return=-177.6 (+1 eps) [worker 1] episodes_seen=2190 last_return=-92.3 (+1 eps) [worker 2] episodes_seen=2190 last_return=-107.4 (+1 eps) [worker 3] episodes_seen=2180 last_return=-130.3 (+1 eps) [worker 0] episodes_seen=2200 last_return=-85.4 (+1 eps) [worker 1] episodes_seen=2200 last_return=-81.0 (+1 eps) [worker 2] episodes_seen=2200 last_return=-98.6 (+1 eps) [worker 3] episodes_seen=2190 last_return=-84.5 (+1 eps) [worker 0] episodes_seen=2210 last_return=-221.0 (+1 eps) [worker 3] episodes_seen=2200 last_return=-152.5 (+1 eps) [worker 2] episodes_seen=2210 last_return=-119.7 (+1 eps) [worker 1] episodes_seen=2210 last_return=-82.7 (+1 eps) [worker 0] episodes_seen=2220 last_return=-85.2 (+1 eps) [worker 2] episodes_seen=2220 last_return=-92.4 (+1 eps) [worker 1] episodes_seen=2220 last_return=-67.9 (+1 eps) [worker 3] episodes_seen=2210 last_return=-74.2 (+1 eps) [worker 0] episodes_seen=2230 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=2230 last_return=-96.8 (+1 eps) [worker 1] episodes_seen=2230 last_return=-95.8 (+1 eps) [worker 3] episodes_seen=2220 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=2240 last_return=26.3 (+1 eps) [worker 3] episodes_seen=2230 last_return=-91.4 (+1 eps) [worker 1] episodes_seen=2240 last_return=-98.0 (+1 eps) [worker 2] episodes_seen=2240 last_return=-104.8 (+1 eps) [worker 0] episodes_seen=2250 last_return=-80.3 (+1 eps) [worker 2] episodes_seen=2250 last_return=-74.2 (+1 eps) [worker 1] episodes_seen=2250 last_return=-79.6 (+1 eps) [worker 3] episodes_seen=2240 last_return=-90.9 (+1 eps) [worker 0] episodes_seen=2260 last_return=-94.8 (+1 eps) [worker 1] episodes_seen=2260 last_return=-82.7 (+1 eps) [A2C][sync] it= 5345 steps= 641400 (+120) avg10= -90.99 loss=17.269 pg=-0.030 vf=34.654 H=0.938 gn=135.069 [worker 2] episodes_seen=2260 last_return=-102.5 (+1 eps) [worker 3] episodes_seen=2250 last_return=-101.5 (+1 eps) [worker 0] episodes_seen=2270 last_return=-92.3 (+1 eps) [worker 1] episodes_seen=2270 last_return=-138.4 (+1 eps) [worker 2] episodes_seen=2270 last_return=-94.3 (+1 eps) [worker 3] episodes_seen=2260 last_return=-120.3 (+1 eps) [worker 0] episodes_seen=2280 last_return=-104.1 (+1 eps) [worker 1] episodes_seen=2280 last_return=-93.0 (+1 eps) [worker 2] episodes_seen=2280 last_return=-92.0 (+1 eps) [worker 3] episodes_seen=2270 last_return=-103.0 (+1 eps) [worker 0] episodes_seen=2290 last_return=-103.9 (+1 eps) [worker 1] episodes_seen=2290 last_return=-100.3 (+1 eps) [worker 2] episodes_seen=2290 last_return=-100.9 (+1 eps) [worker 3] episodes_seen=2280 last_return=-72.7 (+1 eps) [worker 0] episodes_seen=2300 last_return=-97.2 (+1 eps) [worker 1] episodes_seen=2300 last_return=-82.5 (+1 eps) [worker 2] episodes_seen=2300 last_return=-87.1 (+1 eps) [worker 3] episodes_seen=2290 last_return=-93.0 (+1 eps) [worker 0] episodes_seen=2310 last_return=-95.1 (+1 eps) [worker 1] episodes_seen=2310 last_return=-103.6 (+1 eps) [worker 2] episodes_seen=2310 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=2300 last_return=-99.9 (+1 eps) [worker 0] episodes_seen=2320 last_return=-87.9 (+1 eps) [worker 1] episodes_seen=2320 last_return=-99.1 (+1 eps) [worker 3] episodes_seen=2310 last_return=-101.3 (+1 eps) [worker 2] episodes_seen=2320 last_return=-111.4 (+1 eps) [worker 0] episodes_seen=2330 last_return=-99.9 (+1 eps) [worker 1] episodes_seen=2330 last_return=-90.4 (+1 eps) [worker 2] episodes_seen=2330 last_return=-82.3 (+1 eps) [worker 3] episodes_seen=2320 last_return=-81.0 (+1 eps) [worker 0] episodes_seen=2340 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=2340 last_return=-88.0 (+1 eps) [worker 1] episodes_seen=2340 last_return=-117.7 (+1 eps) [worker 3] episodes_seen=2330 last_return=-103.5 (+1 eps) [worker 0] episodes_seen=2350 last_return=-113.2 (+1 eps) [worker 1] episodes_seen=2350 last_return=-82.5 (+1 eps) [worker 2] episodes_seen=2350 last_return=-100.6 (+1 eps) [worker 3] episodes_seen=2340 last_return=-74.0 (+1 eps) [worker 0] episodes_seen=2360 last_return=-95.3 (+1 eps) [worker 1] episodes_seen=2360 last_return=-99.4 (+1 eps) [worker 2] episodes_seen=2360 last_return=-104.8 (+1 eps) [worker 3] episodes_seen=2350 last_return=-105.7 (+1 eps) [worker 0] episodes_seen=2370 last_return=-105.3 (+1 eps) [worker 1] episodes_seen=2370 last_return=-96.6 (+1 eps) [worker 2] episodes_seen=2370 last_return=-93.7 (+1 eps) [worker 3] episodes_seen=2360 last_return=-76.1 (+1 eps) [worker 0] episodes_seen=2380 last_return=-132.1 (+1 eps) [worker 1] episodes_seen=2380 last_return=-121.2 (+1 eps) [worker 2] episodes_seen=2380 last_return=-99.2 (+1 eps) [worker 3] episodes_seen=2370 last_return=-101.1 (+1 eps) [worker 0] episodes_seen=2390 last_return=-87.2 (+1 eps) [worker 1] episodes_seen=2390 last_return=-126.8 (+1 eps) [worker 3] episodes_seen=2380 last_return=-92.6 (+1 eps) [worker 2] episodes_seen=2390 last_return=-102.7 (+1 eps) [worker 0] episodes_seen=2400 last_return=-117.6 (+1 eps) [worker 3] episodes_seen=2390 last_return=-72.6 (+1 eps) [worker 1] episodes_seen=2400 last_return=-114.1 (+1 eps) [worker 2] episodes_seen=2400 last_return=-99.9 (+1 eps) [A2C][sync] it= 5679 steps= 681480 (+120) avg10= -88.18 loss=1472.685 pg=0.051 vf=2945.312 H=0.721 gn=1555.733 [worker 0] episodes_seen=2410 last_return=-121.2 (+1 eps) [worker 1] episodes_seen=2410 last_return=-66.0 (+1 eps) [worker 3] episodes_seen=2400 last_return=-60.8 (+1 eps) [worker 2] episodes_seen=2410 last_return=-74.3 (+1 eps) [worker 0] episodes_seen=2420 last_return=-94.4 (+1 eps) [worker 1] episodes_seen=2420 last_return=-114.4 (+1 eps) [worker 3] episodes_seen=2410 last_return=-70.0 (+1 eps) [worker 2] episodes_seen=2420 last_return=-84.4 (+1 eps) [worker 0] episodes_seen=2430 last_return=-72.4 (+1 eps) [worker 1] episodes_seen=2430 last_return=-100.8 (+1 eps) [worker 3] episodes_seen=2420 last_return=-89.5 (+1 eps) [worker 2] episodes_seen=2430 last_return=-109.3 (+1 eps) [worker 0] episodes_seen=2440 last_return=-70.6 (+1 eps) [worker 3] episodes_seen=2430 last_return=-98.7 (+1 eps) [worker 1] episodes_seen=2440 last_return=-104.3 (+1 eps) [worker 2] episodes_seen=2440 last_return=-80.2 (+1 eps) [worker 0] episodes_seen=2450 last_return=-69.9 (+1 eps) [worker 3] episodes_seen=2440 last_return=-93.2 (+1 eps) [worker 1] episodes_seen=2450 last_return=-104.2 (+1 eps) [worker 2] episodes_seen=2450 last_return=-94.1 (+1 eps) [worker 0] episodes_seen=2460 last_return=-102.5 (+1 eps) [worker 1] episodes_seen=2460 last_return=-98.7 (+1 eps) [worker 3] episodes_seen=2450 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=2460 last_return=-84.4 (+1 eps) [worker 0] episodes_seen=2470 last_return=-67.5 (+1 eps) [worker 3] episodes_seen=2460 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=2470 last_return=-72.4 (+1 eps) [worker 1] episodes_seen=2470 last_return=-84.3 (+1 eps) [worker 0] episodes_seen=2480 last_return=-85.6 (+1 eps) [worker 3] episodes_seen=2470 last_return=-95.6 (+1 eps) [worker 2] episodes_seen=2480 last_return=-79.8 (+1 eps) [worker 1] episodes_seen=2480 last_return=-132.4 (+1 eps) [worker 0] episodes_seen=2490 last_return=-84.8 (+1 eps) [worker 3] episodes_seen=2480 last_return=-94.2 (+1 eps) [worker 2] episodes_seen=2490 last_return=-86.1 (+1 eps) [worker 1] episodes_seen=2490 last_return=-83.5 (+1 eps) [worker 0] episodes_seen=2500 last_return=-76.3 (+1 eps) [worker 3] episodes_seen=2490 last_return=-122.0 (+1 eps) [worker 2] episodes_seen=2500 last_return=-86.0 (+1 eps) [worker 1] episodes_seen=2500 last_return=-87.3 (+1 eps) [worker 0] episodes_seen=2510 last_return=-86.6 (+1 eps) [worker 3] episodes_seen=2500 last_return=-102.2 (+1 eps) [worker 2] episodes_seen=2510 last_return=-63.3 (+1 eps) [worker 1] episodes_seen=2510 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=2520 last_return=-85.6 (+1 eps) [worker 2] episodes_seen=2520 last_return=-128.9 (+1 eps) [worker 3] episodes_seen=2510 last_return=-98.3 (+1 eps) [worker 1] episodes_seen=2520 last_return=-101.3 (+1 eps) [worker 0] episodes_seen=2530 last_return=-120.7 (+1 eps) [worker 2] episodes_seen=2530 last_return=-102.9 (+1 eps) [worker 3] episodes_seen=2520 last_return=-102.5 (+1 eps) [worker 1] episodes_seen=2530 last_return=-106.6 (+1 eps) [worker 0] episodes_seen=2540 last_return=-97.5 (+1 eps) [worker 2] episodes_seen=2540 last_return=-87.6 (+1 eps) [worker 3] episodes_seen=2530 last_return=-96.6 (+1 eps) [worker 1] episodes_seen=2540 last_return=-93.2 (+1 eps) [worker 0] episodes_seen=2550 last_return=-122.8 (+1 eps) [A2C][sync] it= 6013 steps= 721560 (+120) avg10=-102.16 loss=23.277 pg=-0.132 vf=46.870 H=0.883 gn=374.504 [worker 2] episodes_seen=2550 last_return=-90.0 (+1 eps) [worker 1] episodes_seen=2550 last_return=-84.1 (+1 eps) [worker 3] episodes_seen=2540 last_return=-110.2 (+1 eps) [worker 0] episodes_seen=2560 last_return=-85.7 (+1 eps) [worker 2] episodes_seen=2560 last_return=-92.5 (+1 eps) [worker 1] episodes_seen=2560 last_return=-95.0 (+1 eps) [worker 0] episodes_seen=2570 last_return=-61.6 (+1 eps) [worker 3] episodes_seen=2550 last_return=-115.6 (+1 eps) [worker 2] episodes_seen=2570 last_return=-90.6 (+1 eps) [worker 1] episodes_seen=2570 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=2560 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=2580 last_return=-105.7 (+1 eps) [worker 2] episodes_seen=2580 last_return=-95.4 (+1 eps) [worker 1] episodes_seen=2580 last_return=-75.7 (+1 eps) [worker 3] episodes_seen=2570 last_return=-99.0 (+1 eps) [worker 0] episodes_seen=2590 last_return=-84.1 (+1 eps) [worker 2] episodes_seen=2590 last_return=-109.9 (+1 eps) [worker 1] episodes_seen=2590 last_return=4.3 (+1 eps) [worker 3] episodes_seen=2580 last_return=-104.8 (+1 eps) [worker 0] episodes_seen=2600 last_return=-104.1 (+1 eps) [worker 2] episodes_seen=2600 last_return=-94.7 (+1 eps) [worker 1] episodes_seen=2600 last_return=-80.7 (+1 eps) [worker 3] episodes_seen=2590 last_return=-104.6 (+1 eps) [worker 0] episodes_seen=2610 last_return=-130.6 (+1 eps) [worker 2] episodes_seen=2610 last_return=-110.5 (+1 eps) [worker 1] episodes_seen=2610 last_return=-80.4 (+1 eps) [worker 3] episodes_seen=2600 last_return=-99.2 (+1 eps) [worker 0] episodes_seen=2620 last_return=-35.3 (+1 eps) [worker 2] episodes_seen=2620 last_return=-103.7 (+1 eps) [worker 1] episodes_seen=2620 last_return=-121.5 (+1 eps) [worker 3] episodes_seen=2610 last_return=-94.0 (+1 eps) [worker 0] episodes_seen=2630 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=2630 last_return=-86.4 (+1 eps) [worker 1] episodes_seen=2630 last_return=-108.3 (+1 eps) [worker 3] episodes_seen=2620 last_return=-85.4 (+1 eps) [worker 0] episodes_seen=2640 last_return=-104.3 (+1 eps) [worker 2] episodes_seen=2640 last_return=-79.0 (+1 eps) [worker 1] episodes_seen=2640 last_return=-110.6 (+1 eps) [worker 3] episodes_seen=2630 last_return=-98.7 (+1 eps) [worker 0] episodes_seen=2650 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=2650 last_return=-89.0 (+1 eps) [worker 1] episodes_seen=2650 last_return=-77.9 (+1 eps) [worker 3] episodes_seen=2640 last_return=-106.2 (+1 eps) [worker 0] episodes_seen=2660 last_return=-80.2 (+1 eps) [worker 2] episodes_seen=2660 last_return=-73.9 (+1 eps) [worker 1] episodes_seen=2660 last_return=-97.9 (+1 eps) [worker 3] episodes_seen=2650 last_return=-94.6 (+1 eps) [worker 0] episodes_seen=2670 last_return=-104.5 (+1 eps) [worker 2] episodes_seen=2670 last_return=-93.5 (+1 eps) [worker 1] episodes_seen=2670 last_return=-79.2 (+1 eps) [worker 3] episodes_seen=2660 last_return=-104.0 (+1 eps) [worker 0] episodes_seen=2680 last_return=-104.9 (+1 eps) [worker 2] episodes_seen=2680 last_return=-95.5 (+1 eps) [worker 1] episodes_seen=2680 last_return=-86.2 (+1 eps) [worker 3] episodes_seen=2670 last_return=-91.2 (+1 eps) [worker 0] episodes_seen=2690 last_return=-80.2 (+1 eps) [worker 2] episodes_seen=2690 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2690 last_return=-100.0 (+1 eps) [A2C][sync] it= 6347 steps= 761640 (+120) avg10= -91.56 loss=20.195 pg=0.029 vf=40.392 H=0.991 gn=483.144 [worker 3] episodes_seen=2680 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=2700 last_return=-98.9 (+1 eps) [worker 2] episodes_seen=2700 last_return=-84.7 (+1 eps) [worker 1] episodes_seen=2700 last_return=-99.2 (+1 eps) [worker 3] episodes_seen=2690 last_return=-77.4 (+1 eps) [worker 0] episodes_seen=2710 last_return=-77.1 (+1 eps) [worker 2] episodes_seen=2710 last_return=-67.3 (+1 eps) [worker 1] episodes_seen=2710 last_return=-90.8 (+1 eps) [worker 3] episodes_seen=2700 last_return=-74.5 (+1 eps) [worker 0] episodes_seen=2720 last_return=-71.5 (+1 eps) [worker 2] episodes_seen=2720 last_return=-108.1 (+1 eps) [worker 1] episodes_seen=2720 last_return=-92.2 (+1 eps) [worker 0] episodes_seen=2730 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=2710 last_return=-69.1 (+1 eps) [worker 2] episodes_seen=2730 last_return=-111.0 (+1 eps) [worker 1] episodes_seen=2730 last_return=-23.5 (+1 eps) [worker 0] episodes_seen=2740 last_return=-94.1 (+1 eps) [worker 3] episodes_seen=2720 last_return=-111.8 (+1 eps) [worker 2] episodes_seen=2740 last_return=-88.4 (+1 eps) [worker 1] episodes_seen=2740 last_return=-89.5 (+1 eps) [worker 0] episodes_seen=2750 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=2730 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=2750 last_return=-111.8 (+1 eps) [worker 1] episodes_seen=2750 last_return=-79.2 (+1 eps) [worker 3] episodes_seen=2740 last_return=-78.3 (+1 eps) [worker 0] episodes_seen=2760 last_return=-99.2 (+1 eps) [worker 2] episodes_seen=2760 last_return=-88.2 (+1 eps) [worker 1] episodes_seen=2760 last_return=-70.6 (+1 eps) [worker 3] episodes_seen=2750 last_return=-70.0 (+1 eps) [worker 0] episodes_seen=2770 last_return=-83.6 (+1 eps) [worker 2] episodes_seen=2770 last_return=-90.1 (+1 eps) [worker 1] episodes_seen=2770 last_return=-68.4 (+1 eps) [worker 0] episodes_seen=2780 last_return=-109.6 (+1 eps) [worker 3] episodes_seen=2760 last_return=-84.2 (+1 eps) [worker 2] episodes_seen=2780 last_return=-116.3 (+1 eps) [worker 1] episodes_seen=2780 last_return=-82.3 (+1 eps) [worker 3] episodes_seen=2770 last_return=-102.6 (+1 eps) [worker 0] episodes_seen=2790 last_return=-76.2 (+1 eps) [worker 2] episodes_seen=2790 last_return=-80.4 (+1 eps) [worker 1] episodes_seen=2790 last_return=-80.4 (+1 eps) [worker 3] episodes_seen=2780 last_return=-89.6 (+1 eps) [worker 0] episodes_seen=2800 last_return=-89.9 (+1 eps) [worker 2] episodes_seen=2800 last_return=-94.4 (+1 eps) [worker 1] episodes_seen=2800 last_return=-90.2 (+1 eps) [worker 3] episodes_seen=2790 last_return=-95.5 (+1 eps) [worker 0] episodes_seen=2810 last_return=-86.1 (+1 eps) [worker 2] episodes_seen=2810 last_return=-93.0 (+1 eps) [worker 1] episodes_seen=2810 last_return=-90.1 (+1 eps) [worker 0] episodes_seen=2820 last_return=-97.2 (+1 eps) [worker 3] episodes_seen=2800 last_return=-98.7 (+1 eps) [worker 2] episodes_seen=2820 last_return=-87.4 (+1 eps) [worker 1] episodes_seen=2820 last_return=-91.1 (+1 eps) [worker 0] episodes_seen=2830 last_return=-100.9 (+1 eps) [worker 3] episodes_seen=2810 last_return=-85.5 (+1 eps) [worker 2] episodes_seen=2830 last_return=-92.9 (+1 eps) [worker 1] episodes_seen=2830 last_return=-76.3 (+1 eps) [A2C][sync] it= 6667 steps= 800040 (+120) avg10= -89.33 loss=5.758 pg=-0.105 vf=11.776 H=0.840 gn=221.209 [Checkpoint] Saved self-describing checkpoint → part2_artifacts/checkpoints/a2c_run5_seed1227.pth [A2C][sync] done: steps=800040 time=744.2s avg10=-89.33
[Run run5_seed1227] checkpoint: part2_artifacts/checkpoints/a2c_run5_seed1227.pth [Run run5_seed1227] training plot (tail 500): part2_artifacts/train_curve_run5_seed1227.png [Run run5_seed1227] training plot (full): part2_artifacts/train_curve_full_run5_seed1227.png [Run run5_seed1227] per-worker plot: part2_artifacts/train_curve_workers_run5_seed1227.png [Run run5_seed1227] workers-average plot: part2_artifacts/train_curve_workers_avg_run5_seed1227.png
[Eval run5_seed1227] mean=-120.04 std=8.48 min=-135.32 max=-105.37 [Eval run5_seed1227] CSV: part2_artifacts/eval10_run5_seed1227.csv [Eval run5_seed1227] plot: part2_artifacts/eval10_run5_seed1227.png [Best] ep=8 return=-105.37 seed=1235
/usr/local/lib/python3.12/dist-packages/gymnasium/wrappers/rendering.py:293: UserWarning: WARN: Overwriting existing videos at /content/part2_artifacts/videos/run5_seed1227 folder (try specifying a different `video_folder` for the `RecordVideo` wrapper if this is not desired)
logger.warn(
[Video run5_seed1227] episode return=-105.37 [Video run5_seed1227] saved under: part2_artifacts/videos run5_seed1227 | mean=-120.0±8.5 | best_ep=8, best_ret=-105.4
Run#6
run_id = f"run6_seed{SEED}"
# Train (multi-worker) and save checkpoint & plots
model, logs, paths = train_once(
run_id=run_id,
n_workers=4,
total_env_steps=900_000,
T=30,
gamma=0.99,
entropy_coef=0.02,
value_coef=0.55,
max_grad_norm=0.5,
lr=7e-4,
log_every=50_000,
)
# Fixed-seed greedy evaluation (10 episodes)
metrics, eval_paths = evaluate_10(run_id, paths.ckpt_path)
# Record the best episode from eval-10 using its seed
video_dir, best_idx, best_ret = record_best_from_eval(run_id, paths.ckpt_path)
print(f"{run_id} | mean={metrics['mean']:.1f}±{metrics['std']:.1f} | best_ep={best_idx}, best_ret={best_ret:.1f}")
[Run run6_seed1227] starting training… [A2C][sync] start: workers=4, T=30, target_steps=900000, mp=fork [A2C][sync] it= 1 steps= 120 (+120) avg10= nan loss=119.979 pg=0.000 vf=218.195 H=1.386 gn=29.204 [worker 2] episodes_seen=10 last_return=-106.2 (+1 eps) [worker 0] episodes_seen=10 last_return=-103.6 (+1 eps) [worker 1] episodes_seen=10 last_return=-112.3 (+1 eps) [worker 3] episodes_seen=10 last_return=-121.9 (+1 eps) [worker 2] episodes_seen=20 last_return=-106.9 (+1 eps) [worker 1] episodes_seen=20 last_return=-110.6 (+1 eps) [worker 0] episodes_seen=20 last_return=-215.8 (+1 eps) [worker 3] episodes_seen=20 last_return=-128.7 (+1 eps) [worker 0] episodes_seen=30 last_return=-166.8 (+1 eps) [worker 3] episodes_seen=30 last_return=-261.1 (+1 eps) [worker 2] episodes_seen=30 last_return=-115.2 (+1 eps) [worker 1] episodes_seen=30 last_return=-250.3 (+1 eps) [worker 0] episodes_seen=40 last_return=-143.2 (+1 eps) [worker 3] episodes_seen=40 last_return=-240.7 (+1 eps) [worker 2] episodes_seen=40 last_return=-271.5 (+1 eps) [worker 1] episodes_seen=40 last_return=-272.6 (+1 eps) [worker 0] episodes_seen=50 last_return=-199.1 (+1 eps) [worker 3] episodes_seen=50 last_return=-150.4 (+1 eps) [worker 1] episodes_seen=50 last_return=-151.5 (+1 eps) [worker 2] episodes_seen=50 last_return=-353.0 (+1 eps) [worker 0] episodes_seen=60 last_return=-140.9 (+1 eps) [worker 3] episodes_seen=60 last_return=-276.4 (+1 eps) [worker 1] episodes_seen=60 last_return=-233.5 (+1 eps) [worker 2] episodes_seen=60 last_return=-153.5 (+1 eps) [worker 0] episodes_seen=70 last_return=-139.9 (+1 eps) [worker 3] episodes_seen=70 last_return=-385.2 (+1 eps) [worker 1] episodes_seen=70 last_return=-132.7 (+1 eps) [worker 2] episodes_seen=70 last_return=-265.8 (+1 eps) [worker 0] episodes_seen=80 last_return=-369.4 (+1 eps) [worker 3] episodes_seen=80 last_return=-176.2 (+1 eps) [worker 1] episodes_seen=80 last_return=-191.4 (+1 eps) [worker 2] episodes_seen=80 last_return=-295.9 (+1 eps) [worker 0] episodes_seen=90 last_return=-203.5 (+1 eps) [worker 3] episodes_seen=90 last_return=-254.5 (+1 eps) [worker 1] episodes_seen=90 last_return=-324.4 (+1 eps) [worker 0] episodes_seen=100 last_return=-212.0 (+1 eps) [worker 2] episodes_seen=90 last_return=-254.7 (+1 eps) [worker 3] episodes_seen=100 last_return=-332.3 (+1 eps) [worker 1] episodes_seen=100 last_return=-384.4 (+1 eps) [worker 0] episodes_seen=110 last_return=-149.0 (+1 eps) [worker 2] episodes_seen=100 last_return=-100.0 (+1 eps) [A2C][sync] it= 418 steps= 50160 (+120) avg10=-262.47 loss=119388.500 pg=-0.000 vf=217070.000 H=0.001 gn=143670.359 [worker 2] episodes_seen=110 last_return=-227.1 (+1 eps) [worker 1] episodes_seen=110 last_return=-237.2 (+1 eps) [worker 0] episodes_seen=120 last_return=-207.0 (+1 eps) [worker 3] episodes_seen=110 last_return=-310.5 (+1 eps) [worker 2] episodes_seen=120 last_return=-186.7 (+1 eps) [worker 1] episodes_seen=120 last_return=-270.1 (+1 eps) [worker 0] episodes_seen=130 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=120 last_return=-151.3 (+1 eps) [worker 2] episodes_seen=130 last_return=-203.0 (+1 eps) [worker 0] episodes_seen=140 last_return=-173.0 (+1 eps) [worker 1] episodes_seen=130 last_return=-201.3 (+1 eps) [worker 3] episodes_seen=130 last_return=-284.7 (+1 eps) [worker 2] episodes_seen=140 last_return=-217.2 (+1 eps) [worker 0] episodes_seen=150 last_return=-321.9 (+1 eps) [worker 1] episodes_seen=140 last_return=-269.1 (+1 eps) [worker 3] episodes_seen=140 last_return=-336.1 (+1 eps) [worker 2] episodes_seen=150 last_return=-301.2 (+1 eps) [worker 0] episodes_seen=160 last_return=-174.8 (+1 eps) [worker 3] episodes_seen=150 last_return=-343.8 (+1 eps) [worker 1] episodes_seen=150 last_return=-226.0 (+1 eps) [worker 2] episodes_seen=160 last_return=-202.1 (+1 eps) [worker 0] episodes_seen=170 last_return=-133.7 (+1 eps) [worker 3] episodes_seen=160 last_return=-162.0 (+1 eps) [worker 1] episodes_seen=160 last_return=-255.9 (+1 eps) [worker 2] episodes_seen=170 last_return=-119.7 (+1 eps) [worker 0] episodes_seen=180 last_return=-404.6 (+1 eps) [worker 1] episodes_seen=170 last_return=-340.5 (+1 eps) [worker 3] episodes_seen=170 last_return=-164.6 (+1 eps) [worker 2] episodes_seen=180 last_return=-209.1 (+1 eps) [worker 0] episodes_seen=190 last_return=-236.4 (+1 eps) [worker 3] episodes_seen=180 last_return=-114.5 (+1 eps) [worker 1] episodes_seen=180 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=190 last_return=-253.6 (+1 eps) [worker 0] episodes_seen=200 last_return=-187.1 (+1 eps) [worker 3] episodes_seen=190 last_return=-142.7 (+1 eps) [worker 1] episodes_seen=190 last_return=-221.7 (+1 eps) [worker 2] episodes_seen=200 last_return=-155.8 (+1 eps) [worker 0] episodes_seen=210 last_return=-163.4 (+1 eps) [worker 3] episodes_seen=200 last_return=-227.0 (+1 eps) [worker 1] episodes_seen=200 last_return=-224.1 (+1 eps) [worker 0] episodes_seen=220 last_return=-215.0 (+1 eps) [A2C][sync] it= 835 steps= 100200 (+120) avg10=-234.09 loss=519.586 pg=0.000 vf=944.702 H=0.002 gn=17701.820 [worker 2] episodes_seen=210 last_return=-435.7 (+1 eps) [worker 1] episodes_seen=210 last_return=-199.5 (+1 eps) [worker 3] episodes_seen=210 last_return=-133.3 (+1 eps) [worker 0] episodes_seen=230 last_return=-107.6 (+1 eps) [worker 2] episodes_seen=220 last_return=-342.0 (+1 eps) [worker 1] episodes_seen=220 last_return=-307.7 (+1 eps) [worker 3] episodes_seen=220 last_return=-294.8 (+1 eps) [worker 0] episodes_seen=240 last_return=-320.1 (+1 eps) [worker 2] episodes_seen=230 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=230 last_return=-370.0 (+1 eps) [worker 3] episodes_seen=230 last_return=-195.9 (+1 eps) [worker 0] episodes_seen=250 last_return=-272.1 (+1 eps) [worker 2] episodes_seen=240 last_return=-186.7 (+1 eps) [worker 3] episodes_seen=240 last_return=-189.3 (+1 eps) [worker 1] episodes_seen=240 last_return=-163.6 (+1 eps) [worker 0] episodes_seen=260 last_return=-291.5 (+1 eps) [worker 2] episodes_seen=250 last_return=-272.3 (+1 eps) [worker 3] episodes_seen=250 last_return=-287.0 (+1 eps) [worker 1] episodes_seen=250 last_return=-249.1 (+1 eps) [worker 3] episodes_seen=260 last_return=-304.8 (+1 eps) [worker 0] episodes_seen=270 last_return=-294.0 (+1 eps) [worker 2] episodes_seen=260 last_return=-191.0 (+1 eps) [worker 1] episodes_seen=260 last_return=-265.3 (+1 eps) [worker 0] episodes_seen=280 last_return=-201.4 (+1 eps) [worker 2] episodes_seen=270 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=270 last_return=-268.1 (+1 eps) [worker 1] episodes_seen=270 last_return=-225.9 (+1 eps) [worker 2] episodes_seen=280 last_return=-314.5 (+1 eps) [worker 3] episodes_seen=280 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=290 last_return=-152.7 (+1 eps) [worker 1] episodes_seen=280 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=290 last_return=-216.1 (+1 eps) [worker 0] episodes_seen=300 last_return=-126.0 (+1 eps) [worker 3] episodes_seen=290 last_return=-222.1 (+1 eps) [worker 1] episodes_seen=290 last_return=-449.5 (+1 eps) [worker 2] episodes_seen=300 last_return=-270.9 (+1 eps) [worker 0] episodes_seen=310 last_return=-106.5 (+1 eps) [worker 1] episodes_seen=300 last_return=-206.3 (+1 eps) [worker 3] episodes_seen=300 last_return=-162.9 (+1 eps) [worker 2] episodes_seen=310 last_return=-306.1 (+1 eps) [worker 0] episodes_seen=320 last_return=-266.5 (+1 eps) [A2C][sync] it= 1252 steps= 150240 (+120) avg10=-204.83 loss=52155.879 pg=-0.000 vf=94828.867 H=0.001 gn=60606.367 [worker 3] episodes_seen=310 last_return=-117.6 (+1 eps) [worker 1] episodes_seen=310 last_return=-108.1 (+1 eps) [worker 2] episodes_seen=320 last_return=-113.7 (+1 eps) [worker 0] episodes_seen=330 last_return=-176.3 (+1 eps) [worker 1] episodes_seen=320 last_return=-320.3 (+1 eps) [worker 2] episodes_seen=330 last_return=-171.5 (+1 eps) [worker 3] episodes_seen=320 last_return=-190.0 (+1 eps) [worker 0] episodes_seen=340 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=330 last_return=-213.9 (+1 eps) [worker 3] episodes_seen=330 last_return=-368.2 (+1 eps) [worker 2] episodes_seen=340 last_return=-198.7 (+1 eps) [worker 0] episodes_seen=350 last_return=-299.8 (+1 eps) [worker 1] episodes_seen=340 last_return=-110.8 (+1 eps) [worker 3] episodes_seen=340 last_return=-158.1 (+1 eps) [worker 2] episodes_seen=350 last_return=-244.5 (+1 eps) [worker 0] episodes_seen=360 last_return=-274.0 (+1 eps) [worker 1] episodes_seen=350 last_return=-196.3 (+1 eps) [worker 3] episodes_seen=350 last_return=-134.1 (+1 eps) [worker 2] episodes_seen=360 last_return=-155.1 (+1 eps) [worker 0] episodes_seen=370 last_return=-259.3 (+1 eps) [worker 1] episodes_seen=360 last_return=-447.8 (+1 eps) [worker 3] episodes_seen=360 last_return=-113.0 (+1 eps) [worker 2] episodes_seen=370 last_return=-346.0 (+1 eps) [worker 3] episodes_seen=370 last_return=-323.3 (+1 eps) [worker 0] episodes_seen=380 last_return=-300.3 (+1 eps) [worker 1] episodes_seen=370 last_return=-166.3 (+1 eps) [worker 2] episodes_seen=380 last_return=-123.6 (+1 eps) [worker 1] episodes_seen=380 last_return=-230.1 (+1 eps) [worker 3] episodes_seen=380 last_return=-288.2 (+1 eps) [worker 2] episodes_seen=390 last_return=-135.2 (+1 eps) [worker 0] episodes_seen=390 last_return=-162.8 (+1 eps) [worker 3] episodes_seen=390 last_return=-298.2 (+1 eps) [worker 1] episodes_seen=390 last_return=-273.2 (+1 eps) [worker 2] episodes_seen=400 last_return=-371.5 (+1 eps) [worker 0] episodes_seen=400 last_return=-322.4 (+1 eps) [worker 3] episodes_seen=400 last_return=-107.0 (+1 eps) [worker 2] episodes_seen=410 last_return=-118.4 (+1 eps) [worker 0] episodes_seen=410 last_return=-114.3 (+1 eps) [worker 1] episodes_seen=400 last_return=-174.2 (+1 eps) [worker 3] episodes_seen=410 last_return=-200.7 (+1 eps) [worker 2] episodes_seen=420 last_return=-100.0 (+1 eps) [A2C][sync] it= 1669 steps= 200280 (+120) avg10=-220.39 loss=87315.516 pg=-0.000 vf=158755.484 H=0.001 gn=114262.719 [worker 0] episodes_seen=420 last_return=-189.0 (+1 eps) [worker 1] episodes_seen=410 last_return=-275.3 (+1 eps) [worker 3] episodes_seen=420 last_return=-139.0 (+1 eps) [worker 0] episodes_seen=430 last_return=-235.1 (+1 eps) [worker 2] episodes_seen=430 last_return=-141.3 (+1 eps) [worker 1] episodes_seen=420 last_return=-111.7 (+1 eps) [worker 3] episodes_seen=430 last_return=-342.6 (+1 eps) [worker 1] episodes_seen=430 last_return=-232.6 (+1 eps) [worker 2] episodes_seen=440 last_return=-200.1 (+1 eps) [worker 0] episodes_seen=440 last_return=-209.3 (+1 eps) [worker 3] episodes_seen=440 last_return=-423.2 (+1 eps) [worker 2] episodes_seen=450 last_return=-221.5 (+1 eps) [worker 0] episodes_seen=450 last_return=-298.3 (+1 eps) [worker 1] episodes_seen=440 last_return=-121.5 (+1 eps) [worker 3] episodes_seen=450 last_return=-135.3 (+1 eps) [worker 2] episodes_seen=460 last_return=-290.5 (+1 eps) [worker 1] episodes_seen=450 last_return=-118.1 (+1 eps) [worker 0] episodes_seen=460 last_return=-235.1 (+1 eps) [worker 3] episodes_seen=460 last_return=-248.4 (+1 eps) [worker 0] episodes_seen=470 last_return=-135.0 (+1 eps) [worker 2] episodes_seen=470 last_return=-233.5 (+1 eps) [worker 3] episodes_seen=470 last_return=-132.9 (+1 eps) [worker 1] episodes_seen=460 last_return=-144.6 (+1 eps) [worker 0] episodes_seen=480 last_return=-194.1 (+1 eps) [worker 2] episodes_seen=480 last_return=-266.1 (+1 eps) [worker 3] episodes_seen=480 last_return=-397.4 (+1 eps) [worker 1] episodes_seen=470 last_return=-116.6 (+1 eps) [worker 0] episodes_seen=490 last_return=-121.1 (+1 eps) [worker 2] episodes_seen=490 last_return=-322.2 (+1 eps) [worker 3] episodes_seen=490 last_return=-133.5 (+1 eps) [worker 1] episodes_seen=480 last_return=-252.5 (+1 eps) [worker 0] episodes_seen=500 last_return=-167.1 (+1 eps) [worker 2] episodes_seen=500 last_return=-110.1 (+1 eps) [worker 3] episodes_seen=500 last_return=-227.6 (+1 eps) [worker 1] episodes_seen=490 last_return=-273.7 (+1 eps) [worker 0] episodes_seen=510 last_return=-175.2 (+1 eps) [worker 2] episodes_seen=510 last_return=-346.3 (+1 eps) [worker 3] episodes_seen=510 last_return=-120.8 (+1 eps) [worker 1] episodes_seen=500 last_return=-310.7 (+1 eps) [worker 0] episodes_seen=520 last_return=-100.0 (+1 eps) [A2C][sync] it= 2086 steps= 250320 (+120) avg10=-244.47 loss=46538.379 pg=-0.000 vf=84615.234 H=0.000 gn=51871.359 [worker 2] episodes_seen=520 last_return=-232.3 (+1 eps) [worker 3] episodes_seen=520 last_return=-172.9 (+1 eps) [worker 1] episodes_seen=510 last_return=-223.7 (+1 eps) [worker 0] episodes_seen=530 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=530 last_return=-107.1 (+1 eps) [worker 3] episodes_seen=530 last_return=-134.4 (+1 eps) [worker 0] episodes_seen=540 last_return=-149.3 (+1 eps) [worker 1] episodes_seen=520 last_return=-204.8 (+1 eps) [worker 2] episodes_seen=540 last_return=-298.6 (+1 eps) [worker 3] episodes_seen=540 last_return=-296.8 (+1 eps) [worker 0] episodes_seen=550 last_return=-145.6 (+1 eps) [worker 1] episodes_seen=530 last_return=-214.2 (+1 eps) [worker 2] episodes_seen=550 last_return=-278.9 (+1 eps) [worker 0] episodes_seen=560 last_return=-282.6 (+1 eps) [worker 3] episodes_seen=550 last_return=-270.2 (+1 eps) [worker 1] episodes_seen=540 last_return=-207.3 (+1 eps) [worker 2] episodes_seen=560 last_return=-194.7 (+1 eps) [worker 0] episodes_seen=570 last_return=-292.7 (+1 eps) [worker 3] episodes_seen=560 last_return=-125.6 (+1 eps) [worker 1] episodes_seen=550 last_return=-303.7 (+1 eps) [worker 0] episodes_seen=580 last_return=-269.2 (+1 eps) [worker 2] episodes_seen=570 last_return=-404.6 (+1 eps) [worker 3] episodes_seen=570 last_return=-213.8 (+1 eps) [worker 1] episodes_seen=560 last_return=-257.2 (+1 eps) [worker 0] episodes_seen=590 last_return=-296.3 (+1 eps) [worker 2] episodes_seen=580 last_return=-132.4 (+1 eps) [worker 3] episodes_seen=580 last_return=-179.6 (+1 eps) [worker 1] episodes_seen=570 last_return=-129.9 (+1 eps) [worker 0] episodes_seen=600 last_return=-338.5 (+1 eps) [worker 2] episodes_seen=590 last_return=-335.6 (+1 eps) [worker 1] episodes_seen=580 last_return=-154.2 (+1 eps) [worker 3] episodes_seen=590 last_return=-376.6 (+1 eps) [worker 0] episodes_seen=610 last_return=-296.1 (+1 eps) [worker 2] episodes_seen=600 last_return=-365.2 (+1 eps) [worker 3] episodes_seen=600 last_return=-270.8 (+1 eps) [worker 1] episodes_seen=590 last_return=-251.3 (+1 eps) [worker 0] episodes_seen=620 last_return=-125.2 (+1 eps) [worker 1] episodes_seen=600 last_return=-222.8 (+1 eps) [worker 2] episodes_seen=610 last_return=-206.0 (+1 eps) [worker 3] episodes_seen=610 last_return=-221.3 (+1 eps) [worker 0] episodes_seen=630 last_return=-334.3 (+1 eps) [A2C][sync] it= 2503 steps= 300360 (+120) avg10=-235.60 loss=65826.555 pg=-0.000 vf=119684.641 H=0.000 gn=136118.328 [worker 1] episodes_seen=610 last_return=-131.5 (+1 eps) [worker 2] episodes_seen=620 last_return=-229.7 (+1 eps) [worker 3] episodes_seen=620 last_return=-109.2 (+1 eps) [worker 0] episodes_seen=640 last_return=-229.8 (+1 eps) [worker 2] episodes_seen=630 last_return=-140.7 (+1 eps) [worker 1] episodes_seen=620 last_return=-105.9 (+1 eps) [worker 3] episodes_seen=630 last_return=-138.3 (+1 eps) [worker 0] episodes_seen=650 last_return=-203.7 (+1 eps) [worker 2] episodes_seen=640 last_return=-304.0 (+1 eps) [worker 1] episodes_seen=630 last_return=-109.4 (+1 eps) [worker 3] episodes_seen=640 last_return=-118.2 (+1 eps) [worker 0] episodes_seen=660 last_return=-361.8 (+1 eps) [worker 2] episodes_seen=650 last_return=-178.2 (+1 eps) [worker 3] episodes_seen=650 last_return=-236.0 (+1 eps) [worker 1] episodes_seen=640 last_return=-361.5 (+1 eps) [worker 2] episodes_seen=660 last_return=-154.4 (+1 eps) [worker 0] episodes_seen=670 last_return=-126.5 (+1 eps) [worker 3] episodes_seen=660 last_return=-216.5 (+1 eps) [worker 1] episodes_seen=650 last_return=-287.3 (+1 eps) [worker 2] episodes_seen=670 last_return=-149.0 (+1 eps) [worker 0] episodes_seen=680 last_return=-408.4 (+1 eps) [worker 1] episodes_seen=660 last_return=-213.2 (+1 eps) [worker 3] episodes_seen=670 last_return=-130.2 (+1 eps) [worker 0] episodes_seen=690 last_return=-303.3 (+1 eps) [worker 2] episodes_seen=680 last_return=-119.7 (+1 eps) [worker 3] episodes_seen=680 last_return=-331.9 (+1 eps) [worker 1] episodes_seen=670 last_return=-175.5 (+1 eps) [worker 1] episodes_seen=680 last_return=-261.8 (+1 eps) [worker 3] episodes_seen=690 last_return=-399.2 (+1 eps) [worker 2] episodes_seen=690 last_return=-303.7 (+1 eps) [worker 0] episodes_seen=700 last_return=-247.4 (+1 eps) [worker 1] episodes_seen=690 last_return=-318.2 (+1 eps) [worker 3] episodes_seen=700 last_return=-286.5 (+1 eps) [worker 0] episodes_seen=710 last_return=-175.2 (+1 eps) [worker 2] episodes_seen=700 last_return=-203.2 (+1 eps) [worker 1] episodes_seen=700 last_return=-144.9 (+1 eps) [worker 3] episodes_seen=710 last_return=-295.2 (+1 eps) [worker 0] episodes_seen=720 last_return=-213.8 (+1 eps) [worker 2] episodes_seen=710 last_return=-129.2 (+1 eps) [A2C][sync] it= 2920 steps= 350400 (+120) avg10=-242.61 loss=470.838 pg=-0.004 vf=856.077 H=0.024 gn=5931.721 [worker 1] episodes_seen=710 last_return=-221.3 (+1 eps) [worker 0] episodes_seen=730 last_return=-325.1 (+1 eps) [worker 3] episodes_seen=720 last_return=-297.5 (+1 eps) [worker 2] episodes_seen=720 last_return=-171.5 (+1 eps) [worker 1] episodes_seen=720 last_return=-283.0 (+1 eps) [worker 3] episodes_seen=730 last_return=-126.5 (+1 eps) [worker 0] episodes_seen=740 last_return=-156.2 (+1 eps) [worker 2] episodes_seen=730 last_return=-267.4 (+1 eps) [worker 3] episodes_seen=740 last_return=-205.3 (+1 eps) [worker 2] episodes_seen=740 last_return=-197.6 (+1 eps) [worker 1] episodes_seen=730 last_return=-251.4 (+1 eps) [worker 0] episodes_seen=750 last_return=-124.7 (+1 eps) [worker 3] episodes_seen=750 last_return=-260.5 (+1 eps) [worker 2] episodes_seen=750 last_return=-117.6 (+1 eps) [worker 0] episodes_seen=760 last_return=-121.7 (+1 eps) [worker 1] episodes_seen=740 last_return=-119.7 (+1 eps) [worker 2] episodes_seen=760 last_return=-224.5 (+1 eps) [worker 3] episodes_seen=760 last_return=-210.2 (+1 eps) [worker 0] episodes_seen=770 last_return=-318.3 (+1 eps) [worker 1] episodes_seen=750 last_return=-133.4 (+1 eps) [worker 3] episodes_seen=770 last_return=-91.1 (+1 eps) [worker 2] episodes_seen=770 last_return=-74.3 (+1 eps) [worker 0] episodes_seen=780 last_return=-184.8 (+1 eps) [worker 1] episodes_seen=760 last_return=-63.3 (+1 eps) [worker 2] episodes_seen=780 last_return=-73.3 (+1 eps) [worker 3] episodes_seen=780 last_return=-134.2 (+1 eps) [worker 0] episodes_seen=790 last_return=-31.1 (+1 eps) [worker 1] episodes_seen=770 last_return=-108.6 (+1 eps) [worker 3] episodes_seen=790 last_return=-106.5 (+1 eps) [worker 2] episodes_seen=790 last_return=-102.5 (+1 eps) [worker 0] episodes_seen=800 last_return=-86.1 (+1 eps) [worker 1] episodes_seen=780 last_return=-83.8 (+1 eps) [worker 3] episodes_seen=800 last_return=-126.8 (+1 eps) [worker 2] episodes_seen=800 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=790 last_return=-116.3 (+1 eps) [worker 0] episodes_seen=810 last_return=-77.5 (+1 eps) [worker 3] episodes_seen=810 last_return=-71.6 (+1 eps) [worker 2] episodes_seen=810 last_return=-118.4 (+1 eps) [worker 1] episodes_seen=800 last_return=-81.4 (+1 eps) [worker 0] episodes_seen=820 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=820 last_return=-106.9 (+1 eps) [worker 3] episodes_seen=820 last_return=-94.6 (+1 eps) [worker 0] episodes_seen=830 last_return=-111.1 (+1 eps) [worker 1] episodes_seen=810 last_return=-115.8 (+1 eps) [worker 2] episodes_seen=830 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=830 last_return=42.9 (+1 eps) [worker 1] episodes_seen=820 last_return=-92.7 (+1 eps) [worker 0] episodes_seen=840 last_return=-89.8 (+1 eps) [worker 2] episodes_seen=840 last_return=-114.0 (+1 eps) [worker 3] episodes_seen=840 last_return=-99.5 (+1 eps) [worker 1] episodes_seen=830 last_return=-97.8 (+1 eps) [worker 0] episodes_seen=850 last_return=-104.4 (+1 eps) [A2C][sync] it= 3337 steps= 400440 (+120) avg10=-173.08 loss=1310.849 pg=-0.032 vf=2383.454 H=0.946 gn=2012.021 [worker 3] episodes_seen=850 last_return=-223.4 (+1 eps) [worker 2] episodes_seen=850 last_return=-197.3 (+1 eps) [worker 1] episodes_seen=840 last_return=-211.3 (+1 eps) [worker 0] episodes_seen=860 last_return=-203.1 (+1 eps) [worker 3] episodes_seen=860 last_return=-278.7 (+1 eps) [worker 2] episodes_seen=860 last_return=-131.6 (+1 eps) [worker 1] episodes_seen=850 last_return=-188.7 (+1 eps) [worker 0] episodes_seen=870 last_return=-238.3 (+1 eps) [worker 2] episodes_seen=870 last_return=-224.9 (+1 eps) [worker 3] episodes_seen=870 last_return=-198.8 (+1 eps) [worker 1] episodes_seen=860 last_return=-124.1 (+1 eps) [worker 0] episodes_seen=880 last_return=-133.2 (+1 eps) [worker 3] episodes_seen=880 last_return=-103.8 (+1 eps) [worker 2] episodes_seen=880 last_return=-91.2 (+1 eps) [worker 1] episodes_seen=870 last_return=-101.0 (+1 eps) [worker 0] episodes_seen=890 last_return=-125.0 (+1 eps) [worker 3] episodes_seen=890 last_return=-397.0 (+1 eps) [worker 2] episodes_seen=890 last_return=-122.2 (+1 eps) [worker 1] episodes_seen=880 last_return=-162.6 (+1 eps) [worker 0] episodes_seen=900 last_return=-196.3 (+1 eps) [worker 3] episodes_seen=900 last_return=-27.7 (+1 eps) [worker 1] episodes_seen=890 last_return=-429.1 (+1 eps) [worker 2] episodes_seen=900 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=910 last_return=-128.5 (+1 eps) [worker 3] episodes_seen=910 last_return=-149.9 (+1 eps) [worker 1] episodes_seen=900 last_return=-217.7 (+1 eps) [worker 2] episodes_seen=910 last_return=-192.7 (+1 eps) [worker 0] episodes_seen=920 last_return=-221.6 (+1 eps) [worker 3] episodes_seen=920 last_return=-110.2 (+1 eps) [worker 1] episodes_seen=910 last_return=-105.7 (+1 eps) [worker 2] episodes_seen=920 last_return=-109.3 (+1 eps) [worker 0] episodes_seen=930 last_return=-61.5 (+1 eps) [worker 1] episodes_seen=920 last_return=-227.8 (+1 eps) [worker 2] episodes_seen=930 last_return=-296.4 (+1 eps) [worker 3] episodes_seen=930 last_return=-110.7 (+1 eps) [worker 0] episodes_seen=940 last_return=-146.1 (+1 eps) [worker 2] episodes_seen=940 last_return=-66.9 (+1 eps) [worker 1] episodes_seen=930 last_return=-243.3 (+1 eps) [worker 3] episodes_seen=940 last_return=-244.2 (+1 eps) [worker 0] episodes_seen=950 last_return=-107.0 (+1 eps) [worker 2] episodes_seen=950 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=940 last_return=-106.5 (+1 eps) [worker 0] episodes_seen=960 last_return=-86.9 (+1 eps) [worker 3] episodes_seen=950 last_return=-83.9 (+1 eps) [worker 2] episodes_seen=960 last_return=-121.3 (+1 eps) [worker 1] episodes_seen=950 last_return=-177.1 (+1 eps) [worker 0] episodes_seen=970 last_return=-124.4 (+1 eps) [worker 3] episodes_seen=960 last_return=-120.0 (+1 eps) [worker 2] episodes_seen=970 last_return=-107.8 (+1 eps) [worker 1] episodes_seen=960 last_return=39.0 (+1 eps) [worker 0] episodes_seen=980 last_return=-133.9 (+1 eps) [worker 3] episodes_seen=970 last_return=-71.3 (+1 eps) [worker 2] episodes_seen=980 last_return=-119.0 (+1 eps) [worker 1] episodes_seen=970 last_return=-90.4 (+1 eps) [worker 0] episodes_seen=990 last_return=-266.0 (+1 eps) [worker 3] episodes_seen=980 last_return=-162.4 (+1 eps) [worker 2] episodes_seen=990 last_return=-275.5 (+1 eps) [worker 1] episodes_seen=980 last_return=-238.4 (+1 eps) [worker 0] episodes_seen=1000 last_return=-107.2 (+1 eps) [worker 3] episodes_seen=990 last_return=-284.7 (+1 eps) [worker 2] episodes_seen=1000 last_return=-217.8 (+1 eps) [A2C][sync] it= 3754 steps= 450480 (+120) avg10=-162.92 loss=346.543 pg=-0.045 vf=630.189 H=0.771 gn=2696.229 [worker 0] episodes_seen=1010 last_return=-181.7 (+1 eps) [worker 1] episodes_seen=990 last_return=-215.5 (+1 eps) [worker 3] episodes_seen=1000 last_return=-197.6 (+1 eps) [worker 2] episodes_seen=1010 last_return=-59.3 (+1 eps) [worker 0] episodes_seen=1020 last_return=-352.7 (+1 eps) [worker 1] episodes_seen=1000 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=1010 last_return=-342.3 (+1 eps) [worker 2] episodes_seen=1020 last_return=-128.0 (+1 eps) [worker 1] episodes_seen=1010 last_return=-284.9 (+1 eps) [worker 0] episodes_seen=1030 last_return=-287.2 (+1 eps) [worker 3] episodes_seen=1020 last_return=-236.0 (+1 eps) [worker 2] episodes_seen=1030 last_return=-238.1 (+1 eps) [worker 0] episodes_seen=1040 last_return=-397.9 (+1 eps) [worker 1] episodes_seen=1020 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=1030 last_return=-133.8 (+1 eps) [worker 2] episodes_seen=1040 last_return=-179.1 (+1 eps) [worker 1] episodes_seen=1030 last_return=41.0 (+1 eps) [worker 2] episodes_seen=1050 last_return=-75.8 (+1 eps) [worker 0] episodes_seen=1050 last_return=-131.7 (+1 eps) [worker 3] episodes_seen=1040 last_return=-172.6 (+1 eps) [worker 2] episodes_seen=1060 last_return=-76.6 (+1 eps) [worker 1] episodes_seen=1040 last_return=-93.8 (+1 eps) [worker 0] episodes_seen=1060 last_return=-149.4 (+1 eps) [worker 3] episodes_seen=1050 last_return=-189.0 (+1 eps) [worker 2] episodes_seen=1070 last_return=-129.8 (+1 eps) [worker 1] episodes_seen=1050 last_return=-95.0 (+1 eps) [worker 0] episodes_seen=1070 last_return=-87.5 (+1 eps) [worker 3] episodes_seen=1060 last_return=-92.5 (+1 eps) [worker 2] episodes_seen=1080 last_return=-166.2 (+1 eps) [worker 1] episodes_seen=1060 last_return=-103.7 (+1 eps) [worker 0] episodes_seen=1080 last_return=-83.6 (+1 eps) [worker 3] episodes_seen=1070 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1070 last_return=-79.6 (+1 eps) [worker 0] episodes_seen=1090 last_return=-82.7 (+1 eps) [worker 2] episodes_seen=1090 last_return=-89.0 (+1 eps) [worker 3] episodes_seen=1080 last_return=-213.7 (+1 eps) [worker 1] episodes_seen=1080 last_return=-144.2 (+1 eps) [worker 0] episodes_seen=1100 last_return=-128.4 (+1 eps) [worker 2] episodes_seen=1100 last_return=-123.6 (+1 eps) [worker 3] episodes_seen=1090 last_return=-83.5 (+1 eps) [worker 1] episodes_seen=1090 last_return=-120.4 (+1 eps) [worker 0] episodes_seen=1110 last_return=-101.7 (+1 eps) [worker 2] episodes_seen=1110 last_return=-96.3 (+1 eps) [worker 1] episodes_seen=1100 last_return=-220.6 (+1 eps) [worker 3] episodes_seen=1100 last_return=-185.3 (+1 eps) [worker 0] episodes_seen=1120 last_return=-214.4 (+1 eps) [worker 2] episodes_seen=1120 last_return=-201.5 (+1 eps) [worker 3] episodes_seen=1110 last_return=-95.1 (+1 eps) [worker 1] episodes_seen=1110 last_return=-81.5 (+1 eps) [worker 0] episodes_seen=1130 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1130 last_return=-293.0 (+1 eps) [worker 3] episodes_seen=1120 last_return=-222.5 (+1 eps) [worker 1] episodes_seen=1120 last_return=-102.6 (+1 eps) [worker 0] episodes_seen=1140 last_return=-335.2 (+1 eps) [worker 2] episodes_seen=1140 last_return=-334.7 (+1 eps) [worker 1] episodes_seen=1130 last_return=-283.6 (+1 eps) [worker 3] episodes_seen=1130 last_return=-177.7 (+1 eps) [worker 0] episodes_seen=1150 last_return=-126.2 (+1 eps) [A2C][sync] it= 4171 steps= 500520 (+120) avg10=-111.62 loss=183.499 pg=0.145 vf=333.396 H=0.688 gn=906.168 [worker 2] episodes_seen=1150 last_return=-124.1 (+1 eps) [worker 1] episodes_seen=1140 last_return=-123.4 (+1 eps) [worker 3] episodes_seen=1140 last_return=-145.4 (+1 eps) [worker 0] episodes_seen=1160 last_return=-397.2 (+1 eps) [worker 2] episodes_seen=1160 last_return=-150.9 (+1 eps) [worker 3] episodes_seen=1150 last_return=-135.4 (+1 eps) [worker 1] episodes_seen=1150 last_return=-129.4 (+1 eps) [worker 0] episodes_seen=1170 last_return=-104.8 (+1 eps) [worker 2] episodes_seen=1170 last_return=-140.5 (+1 eps) [worker 1] episodes_seen=1160 last_return=-164.1 (+1 eps) [worker 3] episodes_seen=1160 last_return=-94.3 (+1 eps) [worker 0] episodes_seen=1180 last_return=-196.4 (+1 eps) [worker 2] episodes_seen=1180 last_return=-209.7 (+1 eps) [worker 3] episodes_seen=1170 last_return=-254.0 (+1 eps) [worker 1] episodes_seen=1170 last_return=-113.8 (+1 eps) [worker 0] episodes_seen=1190 last_return=-113.4 (+1 eps) [worker 2] episodes_seen=1190 last_return=-262.7 (+1 eps) [worker 3] episodes_seen=1180 last_return=-140.9 (+1 eps) [worker 0] episodes_seen=1200 last_return=-134.2 (+1 eps) [worker 1] episodes_seen=1180 last_return=-199.9 (+1 eps) [worker 2] episodes_seen=1200 last_return=-90.6 (+1 eps) [worker 3] episodes_seen=1190 last_return=-89.2 (+1 eps) [worker 0] episodes_seen=1210 last_return=-102.9 (+1 eps) [worker 1] episodes_seen=1190 last_return=-68.3 (+1 eps) [worker 2] episodes_seen=1210 last_return=-70.7 (+1 eps) [worker 3] episodes_seen=1200 last_return=-105.6 (+1 eps) [worker 0] episodes_seen=1220 last_return=-144.2 (+1 eps) [worker 1] episodes_seen=1200 last_return=-75.6 (+1 eps) [worker 3] episodes_seen=1210 last_return=-109.8 (+1 eps) [worker 2] episodes_seen=1220 last_return=-54.3 (+1 eps) [worker 1] episodes_seen=1210 last_return=-165.7 (+1 eps) [worker 0] episodes_seen=1230 last_return=-103.9 (+1 eps) [worker 3] episodes_seen=1220 last_return=-176.6 (+1 eps) [worker 2] episodes_seen=1230 last_return=-109.6 (+1 eps) [worker 1] episodes_seen=1220 last_return=-99.6 (+1 eps) [worker 0] episodes_seen=1240 last_return=-107.0 (+1 eps) [worker 3] episodes_seen=1230 last_return=-50.9 (+1 eps) [worker 2] episodes_seen=1240 last_return=-105.8 (+1 eps) [worker 1] episodes_seen=1230 last_return=-90.9 (+1 eps) [worker 0] episodes_seen=1250 last_return=-107.8 (+1 eps) [worker 3] episodes_seen=1240 last_return=-153.2 (+1 eps) [worker 2] episodes_seen=1250 last_return=-113.6 (+1 eps) [worker 0] episodes_seen=1260 last_return=-82.6 (+1 eps) [worker 1] episodes_seen=1240 last_return=-77.1 (+1 eps) [worker 3] episodes_seen=1250 last_return=-55.1 (+1 eps) [worker 2] episodes_seen=1260 last_return=-81.7 (+1 eps) [worker 0] episodes_seen=1270 last_return=-157.6 (+1 eps) [worker 1] episodes_seen=1250 last_return=-83.0 (+1 eps) [worker 3] episodes_seen=1260 last_return=-49.4 (+1 eps) [worker 1] episodes_seen=1260 last_return=-241.8 (+1 eps) [worker 2] episodes_seen=1270 last_return=-76.1 (+1 eps) [worker 0] episodes_seen=1280 last_return=-87.9 (+1 eps) [A2C][sync] it= 4588 steps= 550560 (+120) avg10=-143.76 loss=346.850 pg=-0.034 vf=630.719 H=0.573 gn=1526.125 [worker 3] episodes_seen=1270 last_return=-45.6 (+1 eps) [worker 1] episodes_seen=1270 last_return=-95.6 (+1 eps) [worker 2] episodes_seen=1280 last_return=-99.5 (+1 eps) [worker 0] episodes_seen=1290 last_return=-173.9 (+1 eps) [worker 3] episodes_seen=1280 last_return=-90.1 (+1 eps) [worker 2] episodes_seen=1290 last_return=-71.4 (+1 eps) [worker 0] episodes_seen=1300 last_return=-104.2 (+1 eps) [worker 1] episodes_seen=1280 last_return=-110.8 (+1 eps) [worker 3] episodes_seen=1290 last_return=-101.8 (+1 eps) [worker 2] episodes_seen=1300 last_return=-80.2 (+1 eps) [worker 0] episodes_seen=1310 last_return=-50.6 (+1 eps) [worker 1] episodes_seen=1290 last_return=-69.9 (+1 eps) [worker 3] episodes_seen=1300 last_return=-42.9 (+1 eps) [worker 2] episodes_seen=1310 last_return=-251.8 (+1 eps) [worker 0] episodes_seen=1320 last_return=-78.2 (+1 eps) [worker 1] episodes_seen=1300 last_return=15.7 (+1 eps) [worker 3] episodes_seen=1310 last_return=-121.7 (+1 eps) [worker 2] episodes_seen=1320 last_return=-44.7 (+1 eps) [worker 0] episodes_seen=1330 last_return=-92.5 (+1 eps) [worker 1] episodes_seen=1310 last_return=-43.1 (+1 eps) [worker 3] episodes_seen=1320 last_return=34.7 (+1 eps) [worker 2] episodes_seen=1330 last_return=-18.9 (+1 eps) [worker 0] episodes_seen=1340 last_return=-93.3 (+1 eps) [worker 1] episodes_seen=1320 last_return=-106.7 (+1 eps) [worker 3] episodes_seen=1330 last_return=-57.5 (+1 eps) [worker 2] episodes_seen=1340 last_return=-94.4 (+1 eps) [worker 0] episodes_seen=1350 last_return=-120.2 (+1 eps) [worker 1] episodes_seen=1330 last_return=-127.2 (+1 eps) [worker 3] episodes_seen=1340 last_return=-278.1 (+1 eps) [worker 2] episodes_seen=1350 last_return=-144.1 (+1 eps) [worker 0] episodes_seen=1360 last_return=-416.8 (+1 eps) [worker 1] episodes_seen=1340 last_return=-341.2 (+1 eps) [worker 3] episodes_seen=1350 last_return=-296.7 (+1 eps) [worker 2] episodes_seen=1360 last_return=-180.6 (+1 eps) [worker 0] episodes_seen=1370 last_return=-136.2 (+1 eps) [worker 1] episodes_seen=1350 last_return=-71.5 (+1 eps) [worker 3] episodes_seen=1360 last_return=-91.1 (+1 eps) [worker 0] episodes_seen=1380 last_return=46.7 (+1 eps) [worker 2] episodes_seen=1370 last_return=-66.2 (+1 eps) [worker 1] episodes_seen=1360 last_return=-91.6 (+1 eps) [worker 3] episodes_seen=1370 last_return=11.5 (+1 eps) [worker 0] episodes_seen=1390 last_return=-48.3 (+1 eps) [worker 2] episodes_seen=1380 last_return=-87.4 (+1 eps) [worker 1] episodes_seen=1370 last_return=-96.9 (+1 eps) [A2C][sync] it= 5005 steps= 600600 (+120) avg10=-109.10 loss=256.953 pg=-0.070 vf=467.325 H=0.292 gn=635.290 [worker 3] episodes_seen=1380 last_return=-108.9 (+1 eps) [worker 2] episodes_seen=1390 last_return=-83.0 (+1 eps) [worker 0] episodes_seen=1400 last_return=-85.3 (+1 eps) [worker 1] episodes_seen=1380 last_return=-115.4 (+1 eps) [worker 3] episodes_seen=1390 last_return=-86.7 (+1 eps) [worker 2] episodes_seen=1400 last_return=-138.6 (+1 eps) [worker 1] episodes_seen=1390 last_return=-118.2 (+1 eps) [worker 0] episodes_seen=1410 last_return=-98.0 (+1 eps) [worker 3] episodes_seen=1400 last_return=-88.3 (+1 eps) [worker 2] episodes_seen=1410 last_return=-70.5 (+1 eps) [worker 1] episodes_seen=1400 last_return=-52.3 (+1 eps) [worker 0] episodes_seen=1420 last_return=-158.3 (+1 eps) [worker 3] episodes_seen=1410 last_return=-108.7 (+1 eps) [worker 2] episodes_seen=1420 last_return=-91.3 (+1 eps) [worker 1] episodes_seen=1410 last_return=-42.9 (+1 eps) [worker 0] episodes_seen=1430 last_return=-73.3 (+1 eps) [worker 3] episodes_seen=1420 last_return=-88.8 (+1 eps) [worker 0] episodes_seen=1440 last_return=-143.6 (+1 eps) [worker 2] episodes_seen=1430 last_return=-77.2 (+1 eps) [worker 1] episodes_seen=1420 last_return=-114.7 (+1 eps) [worker 3] episodes_seen=1430 last_return=-105.3 (+1 eps) [worker 1] episodes_seen=1430 last_return=-80.2 (+1 eps) [worker 0] episodes_seen=1450 last_return=-86.6 (+1 eps) [worker 2] episodes_seen=1440 last_return=-114.9 (+1 eps) [worker 3] episodes_seen=1440 last_return=-100.5 (+1 eps) [worker 1] episodes_seen=1440 last_return=-102.3 (+1 eps) [worker 2] episodes_seen=1450 last_return=-104.2 (+1 eps) [worker 0] episodes_seen=1460 last_return=-112.5 (+1 eps) [worker 3] episodes_seen=1450 last_return=-130.1 (+1 eps) [worker 1] episodes_seen=1450 last_return=-76.3 (+1 eps) [worker 2] episodes_seen=1460 last_return=-59.7 (+1 eps) [worker 0] episodes_seen=1470 last_return=-53.1 (+1 eps) [worker 3] episodes_seen=1460 last_return=-107.2 (+1 eps) [worker 1] episodes_seen=1460 last_return=-32.2 (+1 eps) [worker 0] episodes_seen=1480 last_return=-63.8 (+1 eps) [worker 2] episodes_seen=1470 last_return=-91.6 (+1 eps) [worker 3] episodes_seen=1470 last_return=-84.2 (+1 eps) [worker 1] episodes_seen=1470 last_return=-41.4 (+1 eps) [worker 0] episodes_seen=1490 last_return=-71.3 (+1 eps) [worker 3] episodes_seen=1480 last_return=-16.4 (+1 eps) [worker 2] episodes_seen=1480 last_return=-104.1 (+1 eps) [A2C][sync] it= 5422 steps= 650640 (+120) avg10= -65.28 loss=92.210 pg=-0.077 vf=167.823 H=0.779 gn=517.029 [worker 1] episodes_seen=1480 last_return=-86.4 (+1 eps) [worker 2] episodes_seen=1490 last_return=-38.3 (+1 eps) [worker 3] episodes_seen=1490 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1500 last_return=-74.1 (+1 eps) [worker 1] episodes_seen=1490 last_return=-95.2 (+1 eps) [worker 3] episodes_seen=1500 last_return=-54.7 (+1 eps) [worker 2] episodes_seen=1500 last_return=-32.7 (+1 eps) [worker 0] episodes_seen=1510 last_return=-65.5 (+1 eps) [worker 1] episodes_seen=1500 last_return=-87.7 (+1 eps) [worker 2] episodes_seen=1510 last_return=-45.6 (+1 eps) [worker 3] episodes_seen=1510 last_return=-87.5 (+1 eps) [worker 2] episodes_seen=1520 last_return=0.7 (+1 eps) [worker 0] episodes_seen=1520 last_return=-108.8 (+1 eps) [worker 3] episodes_seen=1520 last_return=-127.6 (+1 eps) [worker 1] episodes_seen=1510 last_return=-54.2 (+1 eps) [worker 2] episodes_seen=1530 last_return=-98.0 (+1 eps) [worker 0] episodes_seen=1530 last_return=-89.6 (+1 eps) [worker 1] episodes_seen=1520 last_return=-136.9 (+1 eps) [worker 3] episodes_seen=1530 last_return=-29.2 (+1 eps) [worker 2] episodes_seen=1540 last_return=-57.5 (+1 eps) [worker 0] episodes_seen=1540 last_return=-100.1 (+1 eps) [worker 3] episodes_seen=1540 last_return=-178.1 (+1 eps) [worker 1] episodes_seen=1530 last_return=-114.9 (+1 eps) [worker 0] episodes_seen=1550 last_return=-81.3 (+1 eps) [worker 2] episodes_seen=1550 last_return=-13.0 (+1 eps) [worker 3] episodes_seen=1550 last_return=-107.4 (+1 eps) [worker 1] episodes_seen=1540 last_return=-33.6 (+1 eps) [worker 0] episodes_seen=1560 last_return=-86.9 (+1 eps) [worker 2] episodes_seen=1560 last_return=-99.5 (+1 eps) [worker 3] episodes_seen=1560 last_return=-90.8 (+1 eps) [worker 1] episodes_seen=1550 last_return=-95.5 (+1 eps) [worker 0] episodes_seen=1570 last_return=-48.3 (+1 eps) [worker 3] episodes_seen=1570 last_return=-98.1 (+1 eps) [worker 0] episodes_seen=1580 last_return=-81.4 (+1 eps) [worker 1] episodes_seen=1560 last_return=-44.1 (+1 eps) [worker 2] episodes_seen=1570 last_return=-0.1 (+1 eps) [A2C][sync] it= 5839 steps= 700680 (+120) avg10= -58.12 loss=182.480 pg=0.059 vf=331.707 H=0.882 gn=718.813 [worker 3] episodes_seen=1580 last_return=-104.8 (+1 eps) [worker 0] episodes_seen=1590 last_return=27.8 (+1 eps) [worker 1] episodes_seen=1570 last_return=-77.5 (+1 eps) [worker 2] episodes_seen=1580 last_return=-42.1 (+1 eps) [worker 3] episodes_seen=1590 last_return=23.7 (+1 eps) [worker 1] episodes_seen=1580 last_return=-11.9 (+1 eps) [worker 0] episodes_seen=1600 last_return=-79.5 (+1 eps) [worker 3] episodes_seen=1600 last_return=-68.3 (+1 eps) [worker 2] episodes_seen=1590 last_return=-71.4 (+1 eps) [worker 1] episodes_seen=1590 last_return=-66.7 (+1 eps) [worker 0] episodes_seen=1610 last_return=-105.2 (+1 eps) [worker 3] episodes_seen=1610 last_return=-1.5 (+1 eps) [worker 2] episodes_seen=1600 last_return=-66.5 (+1 eps) [worker 0] episodes_seen=1620 last_return=-22.9 (+1 eps) [worker 1] episodes_seen=1600 last_return=-98.0 (+1 eps) [worker 0] episodes_seen=1630 last_return=-96.0 (+1 eps) [worker 2] episodes_seen=1610 last_return=-105.4 (+1 eps) [worker 3] episodes_seen=1620 last_return=-107.3 (+1 eps) [worker 1] episodes_seen=1610 last_return=-101.0 (+1 eps) [worker 2] episodes_seen=1620 last_return=-57.7 (+1 eps) [worker 0] episodes_seen=1640 last_return=-79.9 (+1 eps) [worker 3] episodes_seen=1630 last_return=-97.9 (+1 eps) [worker 1] episodes_seen=1620 last_return=-64.9 (+1 eps) [worker 0] episodes_seen=1650 last_return=-59.8 (+1 eps) [worker 3] episodes_seen=1640 last_return=-67.0 (+1 eps) [worker 1] episodes_seen=1630 last_return=-90.3 (+1 eps) [worker 2] episodes_seen=1630 last_return=-75.6 (+1 eps) [worker 0] episodes_seen=1660 last_return=-91.5 (+1 eps) [worker 1] episodes_seen=1640 last_return=-80.4 (+1 eps) [worker 3] episodes_seen=1650 last_return=-51.0 (+1 eps) [worker 2] episodes_seen=1640 last_return=-54.9 (+1 eps) [worker 0] episodes_seen=1670 last_return=-88.8 (+1 eps) [worker 2] episodes_seen=1650 last_return=-88.2 (+1 eps) [A2C][sync] it= 6256 steps= 750720 (+120) avg10= -59.67 loss=74.857 pg=0.010 vf=136.121 H=0.993 gn=594.682 [worker 0] episodes_seen=1680 last_return=-97.4 (+1 eps) [worker 3] episodes_seen=1660 last_return=-73.0 (+1 eps) [worker 2] episodes_seen=1660 last_return=-83.8 (+1 eps) [worker 1] episodes_seen=1650 last_return=-72.7 (+1 eps) [worker 0] episodes_seen=1690 last_return=-187.6 (+1 eps) [worker 2] episodes_seen=1670 last_return=-99.6 (+1 eps) [worker 1] episodes_seen=1660 last_return=-78.5 (+1 eps) [worker 3] episodes_seen=1670 last_return=-67.9 (+1 eps) [worker 0] episodes_seen=1700 last_return=-56.9 (+1 eps) [worker 1] episodes_seen=1670 last_return=-64.5 (+1 eps) [worker 2] episodes_seen=1680 last_return=-37.4 (+1 eps) [worker 0] episodes_seen=1710 last_return=-46.5 (+1 eps) [worker 3] episodes_seen=1680 last_return=-123.5 (+1 eps) [worker 1] episodes_seen=1680 last_return=-25.2 (+1 eps) [worker 0] episodes_seen=1720 last_return=-35.4 (+1 eps) [worker 2] episodes_seen=1690 last_return=-11.9 (+1 eps) [worker 3] episodes_seen=1690 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1730 last_return=-2.1 (+1 eps) [worker 1] episodes_seen=1690 last_return=6.3 (+1 eps) [worker 3] episodes_seen=1700 last_return=-12.4 (+1 eps) [worker 2] episodes_seen=1700 last_return=-34.2 (+1 eps) [worker 0] episodes_seen=1740 last_return=-153.1 (+1 eps) [worker 3] episodes_seen=1710 last_return=-124.1 (+1 eps) [worker 1] episodes_seen=1700 last_return=-204.7 (+1 eps) [worker 2] episodes_seen=1710 last_return=-110.2 (+1 eps) [A2C][sync] it= 6673 steps= 800760 (+120) avg10= -73.08 loss=65.285 pg=-0.164 vf=119.028 H=0.775 gn=707.353 [worker 1] episodes_seen=1710 last_return=-94.9 (+1 eps) [worker 0] episodes_seen=1750 last_return=-54.1 (+1 eps) [worker 3] episodes_seen=1720 last_return=-116.6 (+1 eps) [worker 1] episodes_seen=1720 last_return=-118.8 (+1 eps) [worker 0] episodes_seen=1760 last_return=-94.1 (+1 eps) [worker 2] episodes_seen=1720 last_return=-82.4 (+1 eps) [worker 1] episodes_seen=1730 last_return=-37.4 (+1 eps) [worker 0] episodes_seen=1770 last_return=-73.6 (+1 eps) [worker 3] episodes_seen=1730 last_return=-70.7 (+1 eps) [worker 2] episodes_seen=1730 last_return=-56.4 (+1 eps) [worker 3] episodes_seen=1740 last_return=-155.9 (+1 eps) [worker 2] episodes_seen=1740 last_return=-115.6 (+1 eps) [worker 1] episodes_seen=1740 last_return=-126.3 (+1 eps) [worker 0] episodes_seen=1780 last_return=-98.4 (+1 eps) [worker 3] episodes_seen=1750 last_return=-43.2 (+1 eps) [worker 2] episodes_seen=1750 last_return=-79.1 (+1 eps) [worker 3] episodes_seen=1760 last_return=-86.5 (+1 eps) [worker 2] episodes_seen=1760 last_return=-60.3 (+1 eps) [worker 1] episodes_seen=1750 last_return=0.5 (+1 eps) [worker 0] episodes_seen=1790 last_return=-97.0 (+1 eps) [worker 3] episodes_seen=1770 last_return=-76.3 (+1 eps) [worker 2] episodes_seen=1770 last_return=-98.9 (+1 eps) [worker 0] episodes_seen=1800 last_return=-69.6 (+1 eps) [worker 3] episodes_seen=1780 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1760 last_return=100.8 (+1 eps) [worker 0] episodes_seen=1810 last_return=-39.1 (+1 eps) [A2C][sync] it= 7090 steps= 850800 (+120) avg10= -53.57 loss=660.336 pg=0.021 vf=1200.607 H=0.927 gn=1060.233 [worker 2] episodes_seen=1780 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1770 last_return=-20.1 (+1 eps) [worker 3] episodes_seen=1790 last_return=-0.5 (+1 eps) [worker 0] episodes_seen=1820 last_return=-32.9 (+1 eps) [worker 2] episodes_seen=1790 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1780 last_return=-61.8 (+1 eps) [worker 0] episodes_seen=1830 last_return=-74.6 (+1 eps) [worker 3] episodes_seen=1800 last_return=-85.1 (+1 eps) [worker 2] episodes_seen=1800 last_return=-83.9 (+1 eps) [worker 1] episodes_seen=1790 last_return=-48.6 (+1 eps) [worker 3] episodes_seen=1810 last_return=-100.3 (+1 eps) [worker 0] episodes_seen=1840 last_return=100.0 (+1 eps) [worker 2] episodes_seen=1810 last_return=-270.9 (+1 eps) [worker 1] episodes_seen=1800 last_return=-78.2 (+1 eps) [worker 3] episodes_seen=1820 last_return=-106.1 (+1 eps) [worker 0] episodes_seen=1850 last_return=-81.9 (+1 eps) [worker 1] episodes_seen=1810 last_return=-40.9 (+1 eps) [worker 2] episodes_seen=1820 last_return=-63.7 (+1 eps) [worker 3] episodes_seen=1830 last_return=-100.0 (+1 eps) [A2C][sync] it= 7500 steps= 900000 (+120) avg10= -74.98 loss=31.065 pg=-0.032 vf=56.582 H=1.166 gn=505.391 [Checkpoint] Saved self-describing checkpoint → part2_artifacts/checkpoints/a2c_run6_seed1227.pth [A2C][sync] done: steps=900000 time=801.0s avg10=-74.98
[Run run6_seed1227] checkpoint: part2_artifacts/checkpoints/a2c_run6_seed1227.pth [Run run6_seed1227] training plot (tail 500): part2_artifacts/train_curve_run6_seed1227.png [Run run6_seed1227] training plot (full): part2_artifacts/train_curve_full_run6_seed1227.png [Run run6_seed1227] per-worker plot: part2_artifacts/train_curve_workers_run6_seed1227.png [Run run6_seed1227] workers-average plot: part2_artifacts/train_curve_workers_avg_run6_seed1227.png
[Eval run6_seed1227] mean=-38.10 std=13.04 min=-67.11 max=-21.93 [Eval run6_seed1227] CSV: part2_artifacts/eval10_run6_seed1227.csv [Eval run6_seed1227] plot: part2_artifacts/eval10_run6_seed1227.png [Best] ep=7 return=-21.93 seed=1234
/usr/local/lib/python3.12/dist-packages/gymnasium/wrappers/rendering.py:293: UserWarning: WARN: Overwriting existing videos at /content/part2_artifacts/videos/run6_seed1227 folder (try specifying a different `video_folder` for the `RecordVideo` wrapper if this is not desired)
logger.warn(
[Video run6_seed1227] episode return=-21.93 [Video run6_seed1227] saved under: part2_artifacts/videos run6_seed1227 | mean=-38.1±13.0 | best_ep=7, best_ret=-21.9
Run#7
run_id = f"run7_seed{SEED}"
# Train (multi-worker) and save checkpoint & plots
model, logs, paths = train_once(
run_id=run_id,
n_workers=4,
total_env_steps=900_000,
T=30,
gamma=0.99,
entropy_coef=0.015,
value_coef=0.55,
max_grad_norm=0.5,
lr=3e-4,
log_every=40_000,
)
# Fixed-seed greedy evaluation (10 episodes)
metrics, eval_paths = evaluate_10(run_id, paths.ckpt_path)
# Record the best episode from eval-10 using its seed
video_dir, best_idx, best_ret = record_best_from_eval(run_id, paths.ckpt_path)
print(f"{run_id} | mean={metrics['mean']:.1f}±{metrics['std']:.1f} | best_ep={best_idx}, best_ret={best_ret:.1f}")
[Run run7_seed1227] starting training… [A2C][sync] start: workers=4, T=30, target_steps=900000, mp=fork [A2C][sync] it= 1 steps= 120 (+120) avg10= nan loss=290.329 pg=-0.000 vf=527.910 H=1.386 gn=47.596 [worker 3] episodes_seen=10 last_return=-194.8 (+1 eps) [worker 0] episodes_seen=10 last_return=-109.8 (+1 eps) [worker 2] episodes_seen=10 last_return=-150.4 (+1 eps) [worker 1] episodes_seen=10 last_return=-187.9 (+1 eps) [worker 3] episodes_seen=20 last_return=-93.2 (+1 eps) [worker 1] episodes_seen=20 last_return=-106.8 (+1 eps) [worker 0] episodes_seen=20 last_return=-126.0 (+1 eps) [worker 2] episodes_seen=20 last_return=-196.9 (+1 eps) [worker 3] episodes_seen=30 last_return=-231.2 (+1 eps) [worker 0] episodes_seen=30 last_return=-191.5 (+1 eps) [worker 2] episodes_seen=30 last_return=-208.1 (+1 eps) [worker 1] episodes_seen=30 last_return=-121.6 (+1 eps) [worker 3] episodes_seen=40 last_return=-81.2 (+1 eps) [worker 0] episodes_seen=40 last_return=-332.2 (+1 eps) [worker 2] episodes_seen=40 last_return=-168.5 (+1 eps) [worker 1] episodes_seen=40 last_return=-118.1 (+1 eps) [worker 3] episodes_seen=50 last_return=-185.2 (+1 eps) [worker 2] episodes_seen=50 last_return=-116.1 (+1 eps) [worker 1] episodes_seen=50 last_return=46.4 (+1 eps) [worker 0] episodes_seen=50 last_return=-216.8 (+1 eps) [worker 3] episodes_seen=60 last_return=-122.0 (+1 eps) [worker 2] episodes_seen=60 last_return=-176.6 (+1 eps) [worker 1] episodes_seen=60 last_return=-119.1 (+1 eps) [worker 0] episodes_seen=60 last_return=-158.3 (+1 eps) [worker 3] episodes_seen=70 last_return=-177.1 (+1 eps) [worker 1] episodes_seen=70 last_return=-306.1 (+1 eps) [worker 2] episodes_seen=70 last_return=-142.6 (+1 eps) [worker 0] episodes_seen=70 last_return=-145.4 (+1 eps) [worker 3] episodes_seen=80 last_return=-76.9 (+1 eps) [worker 1] episodes_seen=80 last_return=-97.1 (+1 eps) [worker 2] episodes_seen=80 last_return=-90.9 (+1 eps) [worker 0] episodes_seen=80 last_return=-104.3 (+1 eps) [worker 3] episodes_seen=90 last_return=-128.6 (+1 eps) [worker 1] episodes_seen=90 last_return=-252.0 (+1 eps) [worker 0] episodes_seen=90 last_return=-153.6 (+1 eps) [worker 2] episodes_seen=90 last_return=-91.2 (+1 eps) [worker 3] episodes_seen=100 last_return=-186.0 (+1 eps) [A2C][sync] it= 335 steps= 40200 (+120) avg10=-153.92 loss=660.523 pg=0.115 vf=1200.760 H=0.628 gn=1399.129 [worker 1] episodes_seen=100 last_return=-150.5 (+1 eps) [worker 2] episodes_seen=100 last_return=-230.6 (+1 eps) [worker 0] episodes_seen=100 last_return=-195.7 (+1 eps) [worker 3] episodes_seen=110 last_return=-288.3 (+1 eps) [worker 2] episodes_seen=110 last_return=-188.0 (+1 eps) [worker 1] episodes_seen=110 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=110 last_return=-247.0 (+1 eps) [worker 3] episodes_seen=120 last_return=-299.8 (+1 eps) [worker 2] episodes_seen=120 last_return=-284.8 (+1 eps) [worker 0] episodes_seen=120 last_return=-90.7 (+1 eps) [worker 1] episodes_seen=120 last_return=-186.4 (+1 eps) [worker 2] episodes_seen=130 last_return=-93.1 (+1 eps) [worker 1] episodes_seen=130 last_return=-69.7 (+1 eps) [worker 0] episodes_seen=130 last_return=-97.8 (+1 eps) [worker 3] episodes_seen=130 last_return=24.6 (+1 eps) [worker 2] episodes_seen=140 last_return=-91.1 (+1 eps) [worker 3] episodes_seen=140 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=140 last_return=-157.6 (+1 eps) [worker 1] episodes_seen=140 last_return=-79.4 (+1 eps) [worker 2] episodes_seen=150 last_return=-278.6 (+1 eps) [worker 3] episodes_seen=150 last_return=-102.9 (+1 eps) [worker 1] episodes_seen=150 last_return=-120.9 (+1 eps) [worker 0] episodes_seen=150 last_return=-196.5 (+1 eps) [worker 1] episodes_seen=160 last_return=-52.6 (+1 eps) [worker 3] episodes_seen=160 last_return=-101.2 (+1 eps) [worker 0] episodes_seen=160 last_return=-109.8 (+1 eps) [worker 2] episodes_seen=160 last_return=-54.4 (+1 eps) [worker 1] episodes_seen=170 last_return=-111.2 (+1 eps) [worker 3] episodes_seen=170 last_return=-140.8 (+1 eps) [worker 0] episodes_seen=170 last_return=-47.6 (+1 eps) [worker 2] episodes_seen=170 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=180 last_return=-104.7 (+1 eps) [worker 2] episodes_seen=180 last_return=-153.0 (+1 eps) [worker 3] episodes_seen=180 last_return=-80.9 (+1 eps) [worker 0] episodes_seen=180 last_return=-91.4 (+1 eps) [worker 1] episodes_seen=190 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=190 last_return=-101.9 (+1 eps) [worker 3] episodes_seen=190 last_return=-181.9 (+1 eps) [A2C][sync] it= 669 steps= 80280 (+120) avg10=-192.79 loss=305.086 pg=0.002 vf=554.718 H=0.770 gn=483.014 [worker 0] episodes_seen=190 last_return=-86.0 (+1 eps) [worker 1] episodes_seen=200 last_return=-98.9 (+1 eps) [worker 2] episodes_seen=200 last_return=-122.8 (+1 eps) [worker 0] episodes_seen=200 last_return=-225.6 (+1 eps) [worker 3] episodes_seen=200 last_return=-109.3 (+1 eps) [worker 1] episodes_seen=210 last_return=-167.9 (+1 eps) [worker 2] episodes_seen=210 last_return=-275.6 (+1 eps) [worker 3] episodes_seen=210 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=210 last_return=-294.9 (+1 eps) [worker 1] episodes_seen=220 last_return=-194.7 (+1 eps) [worker 3] episodes_seen=220 last_return=-310.3 (+1 eps) [worker 2] episodes_seen=220 last_return=-365.9 (+1 eps) [worker 0] episodes_seen=220 last_return=-302.3 (+1 eps) [worker 1] episodes_seen=230 last_return=-279.0 (+1 eps) [worker 3] episodes_seen=230 last_return=-106.5 (+1 eps) [worker 2] episodes_seen=230 last_return=-272.4 (+1 eps) [worker 0] episodes_seen=230 last_return=-300.3 (+1 eps) [worker 3] episodes_seen=240 last_return=-320.3 (+1 eps) [worker 1] episodes_seen=240 last_return=-241.9 (+1 eps) [worker 2] episodes_seen=240 last_return=-155.0 (+1 eps) [worker 0] episodes_seen=240 last_return=-145.7 (+1 eps) [worker 1] episodes_seen=250 last_return=-108.5 (+1 eps) [worker 3] episodes_seen=250 last_return=-241.2 (+1 eps) [worker 2] episodes_seen=250 last_return=-359.1 (+1 eps) [worker 0] episodes_seen=250 last_return=-298.5 (+1 eps) [worker 3] episodes_seen=260 last_return=-289.0 (+1 eps) [worker 1] episodes_seen=260 last_return=-312.4 (+1 eps) [worker 2] episodes_seen=260 last_return=-210.9 (+1 eps) [worker 0] episodes_seen=260 last_return=-267.5 (+1 eps) [worker 1] episodes_seen=270 last_return=-99.4 (+1 eps) [worker 3] episodes_seen=270 last_return=-57.6 (+1 eps) [A2C][sync] it= 1003 steps= 120360 (+120) avg10= -87.71 loss=707.869 pg=-0.367 vf=1287.717 H=0.573 gn=3390.202 [worker 2] episodes_seen=270 last_return=-91.9 (+1 eps) [worker 1] episodes_seen=280 last_return=-70.5 (+1 eps) [worker 3] episodes_seen=280 last_return=-52.0 (+1 eps) [worker 2] episodes_seen=280 last_return=-97.6 (+1 eps) [worker 0] episodes_seen=270 last_return=-0.9 (+1 eps) [worker 1] episodes_seen=290 last_return=-84.0 (+1 eps) [worker 3] episodes_seen=290 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=290 last_return=-90.8 (+1 eps) [worker 0] episodes_seen=280 last_return=-128.5 (+1 eps) [worker 1] episodes_seen=300 last_return=-106.3 (+1 eps) [worker 3] episodes_seen=300 last_return=-101.4 (+1 eps) [worker 0] episodes_seen=290 last_return=-158.9 (+1 eps) [worker 2] episodes_seen=300 last_return=-163.1 (+1 eps) [worker 1] episodes_seen=310 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=310 last_return=-141.6 (+1 eps) [worker 0] episodes_seen=300 last_return=-189.6 (+1 eps) [worker 2] episodes_seen=310 last_return=-259.3 (+1 eps) [worker 3] episodes_seen=320 last_return=-433.2 (+1 eps) [worker 1] episodes_seen=320 last_return=19.7 (+1 eps) [worker 2] episodes_seen=320 last_return=-99.2 (+1 eps) [worker 0] episodes_seen=310 last_return=-193.5 (+1 eps) [worker 1] episodes_seen=330 last_return=-141.6 (+1 eps) [worker 3] episodes_seen=330 last_return=-143.4 (+1 eps) [worker 2] episodes_seen=330 last_return=-142.9 (+1 eps) [worker 0] episodes_seen=320 last_return=-233.4 (+1 eps) [worker 1] episodes_seen=340 last_return=-183.5 (+1 eps) [worker 3] episodes_seen=340 last_return=-124.5 (+1 eps) [worker 2] episodes_seen=340 last_return=-103.3 (+1 eps) [worker 0] episodes_seen=330 last_return=-141.1 (+1 eps) [worker 1] episodes_seen=350 last_return=-255.9 (+1 eps) [worker 3] episodes_seen=350 last_return=-210.4 (+1 eps) [worker 0] episodes_seen=340 last_return=-72.4 (+1 eps) [worker 2] episodes_seen=350 last_return=-147.1 (+1 eps) [worker 3] episodes_seen=360 last_return=-349.9 (+1 eps) [worker 1] episodes_seen=360 last_return=-224.4 (+1 eps) [worker 0] episodes_seen=350 last_return=-81.2 (+1 eps) [worker 2] episodes_seen=360 last_return=-236.1 (+1 eps) [worker 3] episodes_seen=370 last_return=-141.1 (+1 eps) [worker 1] episodes_seen=370 last_return=-109.2 (+1 eps) [worker 0] episodes_seen=360 last_return=-77.2 (+1 eps) [worker 2] episodes_seen=370 last_return=-28.1 (+1 eps) [worker 3] episodes_seen=380 last_return=-74.7 (+1 eps) [worker 1] episodes_seen=380 last_return=-106.9 (+1 eps) [worker 2] episodes_seen=380 last_return=-133.8 (+1 eps) [worker 0] episodes_seen=370 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=390 last_return=-81.7 (+1 eps) [worker 1] episodes_seen=390 last_return=-79.7 (+1 eps) [worker 0] episodes_seen=380 last_return=-118.9 (+1 eps) [A2C][sync] it= 1337 steps= 160440 (+120) avg10=-119.18 loss=276.783 pg=-0.031 vf=503.334 H=1.306 gn=1134.962 [worker 3] episodes_seen=400 last_return=-110.0 (+1 eps) [worker 2] episodes_seen=390 last_return=-152.5 (+1 eps) [worker 1] episodes_seen=400 last_return=-141.0 (+1 eps) [worker 0] episodes_seen=390 last_return=-303.7 (+1 eps) [worker 3] episodes_seen=410 last_return=-132.4 (+1 eps) [worker 2] episodes_seen=400 last_return=-262.6 (+1 eps) [worker 1] episodes_seen=410 last_return=-350.0 (+1 eps) [worker 0] episodes_seen=400 last_return=-286.2 (+1 eps) [worker 3] episodes_seen=420 last_return=-208.3 (+1 eps) [worker 2] episodes_seen=410 last_return=-191.3 (+1 eps) [worker 1] episodes_seen=420 last_return=-334.7 (+1 eps) [worker 0] episodes_seen=410 last_return=-296.4 (+1 eps) [worker 3] episodes_seen=430 last_return=-286.7 (+1 eps) [worker 2] episodes_seen=420 last_return=-105.5 (+1 eps) [worker 1] episodes_seen=430 last_return=-117.4 (+1 eps) [worker 0] episodes_seen=420 last_return=-57.3 (+1 eps) [worker 2] episodes_seen=430 last_return=-65.7 (+1 eps) [worker 3] episodes_seen=440 last_return=-144.2 (+1 eps) [worker 1] episodes_seen=440 last_return=-92.5 (+1 eps) [worker 0] episodes_seen=430 last_return=-222.7 (+1 eps) [worker 2] episodes_seen=440 last_return=-309.6 (+1 eps) [worker 3] episodes_seen=450 last_return=-133.9 (+1 eps) [worker 1] episodes_seen=450 last_return=-19.6 (+1 eps) [worker 0] episodes_seen=440 last_return=-218.2 (+1 eps) [worker 2] episodes_seen=450 last_return=-189.6 (+1 eps) [worker 3] episodes_seen=460 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=460 last_return=-187.8 (+1 eps) [worker 0] episodes_seen=450 last_return=-278.1 (+1 eps) [worker 2] episodes_seen=460 last_return=-144.5 (+1 eps) [worker 3] episodes_seen=470 last_return=-151.2 (+1 eps) [worker 1] episodes_seen=470 last_return=-272.4 (+1 eps) [worker 0] episodes_seen=460 last_return=-282.8 (+1 eps) [worker 2] episodes_seen=470 last_return=-84.8 (+1 eps) [worker 1] episodes_seen=480 last_return=-168.1 (+1 eps) [worker 3] episodes_seen=480 last_return=-435.3 (+1 eps) [worker 2] episodes_seen=480 last_return=-145.1 (+1 eps) [worker 0] episodes_seen=470 last_return=21.6 (+1 eps) [worker 1] episodes_seen=490 last_return=-158.4 (+1 eps) [worker 3] episodes_seen=490 last_return=-113.2 (+1 eps) [worker 0] episodes_seen=480 last_return=-128.6 (+1 eps) [worker 2] episodes_seen=490 last_return=-211.9 (+1 eps) [worker 1] episodes_seen=500 last_return=-269.0 (+1 eps) [worker 3] episodes_seen=500 last_return=-122.2 (+1 eps) [worker 2] episodes_seen=500 last_return=-97.8 (+1 eps) [worker 0] episodes_seen=490 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=510 last_return=-67.4 (+1 eps) [worker 3] episodes_seen=510 last_return=-132.3 (+1 eps) [worker 2] episodes_seen=510 last_return=-175.3 (+1 eps) [worker 0] episodes_seen=500 last_return=-141.8 (+1 eps) [A2C][sync] it= 1671 steps= 200520 (+120) avg10=-253.27 loss=7034.245 pg=-0.098 vf=12789.728 H=0.507 gn=10260.268 [worker 1] episodes_seen=520 last_return=-205.6 (+1 eps) [worker 3] episodes_seen=520 last_return=-168.1 (+1 eps) [worker 2] episodes_seen=520 last_return=-274.8 (+1 eps) [worker 0] episodes_seen=510 last_return=-100.4 (+1 eps) [worker 1] episodes_seen=530 last_return=-90.4 (+1 eps) [worker 3] episodes_seen=530 last_return=-27.4 (+1 eps) [worker 2] episodes_seen=530 last_return=-93.3 (+1 eps) [worker 0] episodes_seen=520 last_return=-88.6 (+1 eps) [worker 1] episodes_seen=540 last_return=-83.5 (+1 eps) [worker 3] episodes_seen=540 last_return=-85.8 (+1 eps) [worker 2] episodes_seen=540 last_return=-180.8 (+1 eps) [worker 1] episodes_seen=550 last_return=-83.1 (+1 eps) [worker 0] episodes_seen=530 last_return=-77.0 (+1 eps) [worker 3] episodes_seen=550 last_return=-83.5 (+1 eps) [worker 2] episodes_seen=550 last_return=-93.2 (+1 eps) [worker 1] episodes_seen=560 last_return=-174.4 (+1 eps) [worker 0] episodes_seen=540 last_return=-184.0 (+1 eps) [worker 3] episodes_seen=560 last_return=-81.7 (+1 eps) [worker 1] episodes_seen=570 last_return=-48.9 (+1 eps) [worker 2] episodes_seen=560 last_return=-62.5 (+1 eps) [worker 0] episodes_seen=550 last_return=-220.8 (+1 eps) [worker 3] episodes_seen=570 last_return=-188.1 (+1 eps) [worker 1] episodes_seen=580 last_return=-75.6 (+1 eps) [worker 0] episodes_seen=560 last_return=-87.4 (+1 eps) [worker 2] episodes_seen=570 last_return=-124.3 (+1 eps) [worker 3] episodes_seen=580 last_return=-74.3 (+1 eps) [worker 1] episodes_seen=590 last_return=-118.0 (+1 eps) [worker 0] episodes_seen=570 last_return=-159.3 (+1 eps) [worker 2] episodes_seen=580 last_return=-81.2 (+1 eps) [worker 3] episodes_seen=590 last_return=-99.1 (+1 eps) [worker 1] episodes_seen=600 last_return=-84.1 (+1 eps) [worker 0] episodes_seen=580 last_return=-41.4 (+1 eps) [worker 2] episodes_seen=590 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=600 last_return=-68.5 (+1 eps) [worker 0] episodes_seen=590 last_return=-107.9 (+1 eps) [worker 1] episodes_seen=610 last_return=-124.5 (+1 eps) [worker 2] episodes_seen=600 last_return=-213.9 (+1 eps) [worker 3] episodes_seen=610 last_return=-190.7 (+1 eps) [worker 1] episodes_seen=620 last_return=-107.7 (+1 eps) [worker 0] episodes_seen=600 last_return=-98.2 (+1 eps) [worker 2] episodes_seen=610 last_return=-118.0 (+1 eps) [worker 3] episodes_seen=620 last_return=-88.4 (+1 eps) [worker 1] episodes_seen=630 last_return=-97.1 (+1 eps) [worker 0] episodes_seen=610 last_return=-72.6 (+1 eps) [worker 2] episodes_seen=620 last_return=-65.8 (+1 eps) [worker 3] episodes_seen=630 last_return=-192.2 (+1 eps) [A2C][sync] it= 2005 steps= 240600 (+120) avg10=-193.96 loss=2312.157 pg=0.273 vf=4203.449 H=0.843 gn=4952.999 [worker 0] episodes_seen=620 last_return=-320.4 (+1 eps) [worker 1] episodes_seen=640 last_return=-118.1 (+1 eps) [worker 2] episodes_seen=630 last_return=-266.1 (+1 eps) [worker 3] episodes_seen=640 last_return=-90.8 (+1 eps) [worker 0] episodes_seen=630 last_return=-63.9 (+1 eps) [worker 1] episodes_seen=650 last_return=-239.2 (+1 eps) [worker 2] episodes_seen=640 last_return=-104.8 (+1 eps) [worker 3] episodes_seen=650 last_return=-76.4 (+1 eps) [worker 1] episodes_seen=660 last_return=-292.2 (+1 eps) [worker 0] episodes_seen=640 last_return=-159.6 (+1 eps) [worker 2] episodes_seen=650 last_return=-113.9 (+1 eps) [worker 3] episodes_seen=660 last_return=-196.3 (+1 eps) [worker 1] episodes_seen=670 last_return=-239.7 (+1 eps) [worker 2] episodes_seen=660 last_return=-149.4 (+1 eps) [worker 0] episodes_seen=650 last_return=-38.4 (+1 eps) [worker 3] episodes_seen=670 last_return=-382.6 (+1 eps) [worker 2] episodes_seen=670 last_return=-422.3 (+1 eps) [worker 0] episodes_seen=660 last_return=-127.2 (+1 eps) [worker 1] episodes_seen=680 last_return=-364.0 (+1 eps) [worker 3] episodes_seen=680 last_return=-317.8 (+1 eps) [worker 2] episodes_seen=680 last_return=-154.1 (+1 eps) [worker 0] episodes_seen=670 last_return=-17.8 (+1 eps) [worker 1] episodes_seen=690 last_return=-247.9 (+1 eps) [worker 3] episodes_seen=690 last_return=-319.6 (+1 eps) [worker 2] episodes_seen=690 last_return=-417.6 (+1 eps) [worker 0] episodes_seen=680 last_return=-114.0 (+1 eps) [worker 1] episodes_seen=700 last_return=-362.9 (+1 eps) [worker 3] episodes_seen=700 last_return=-80.0 (+1 eps) [worker 2] episodes_seen=700 last_return=-156.2 (+1 eps) [worker 0] episodes_seen=690 last_return=-170.2 (+1 eps) [worker 1] episodes_seen=710 last_return=-82.8 (+1 eps) [worker 3] episodes_seen=710 last_return=-277.9 (+1 eps) [worker 2] episodes_seen=710 last_return=-284.9 (+1 eps) [worker 1] episodes_seen=720 last_return=-226.4 (+1 eps) [worker 0] episodes_seen=700 last_return=-125.5 (+1 eps) [worker 3] episodes_seen=720 last_return=-195.2 (+1 eps) [worker 2] episodes_seen=720 last_return=-249.8 (+1 eps) [worker 0] episodes_seen=710 last_return=-103.6 (+1 eps) [worker 1] episodes_seen=730 last_return=-128.0 (+1 eps) [worker 3] episodes_seen=730 last_return=-87.4 (+1 eps) [worker 2] episodes_seen=730 last_return=-56.4 (+1 eps) [worker 0] episodes_seen=720 last_return=-48.5 (+1 eps) [worker 1] episodes_seen=740 last_return=-263.1 (+1 eps) [worker 3] episodes_seen=740 last_return=-70.8 (+1 eps) [worker 2] episodes_seen=740 last_return=-282.3 (+1 eps) [worker 0] episodes_seen=730 last_return=-188.1 (+1 eps) [worker 1] episodes_seen=750 last_return=-339.3 (+1 eps) [worker 3] episodes_seen=750 last_return=-113.0 (+1 eps) [worker 2] episodes_seen=750 last_return=-124.2 (+1 eps) [worker 0] episodes_seen=740 last_return=-97.4 (+1 eps) [worker 1] episodes_seen=760 last_return=-72.4 (+1 eps) [worker 3] episodes_seen=760 last_return=-148.1 (+1 eps) [worker 2] episodes_seen=760 last_return=-259.1 (+1 eps) [worker 0] episodes_seen=750 last_return=-316.8 (+1 eps) [worker 1] episodes_seen=770 last_return=-280.6 (+1 eps) [A2C][sync] it= 2339 steps= 280680 (+120) avg10=-263.82 loss=3815.908 pg=0.081 vf=6937.881 H=0.468 gn=11387.285 [worker 3] episodes_seen=770 last_return=-69.0 (+1 eps) [worker 2] episodes_seen=770 last_return=-273.5 (+1 eps) [worker 0] episodes_seen=760 last_return=-172.9 (+1 eps) [worker 1] episodes_seen=780 last_return=-121.8 (+1 eps) [worker 3] episodes_seen=780 last_return=-84.6 (+1 eps) [worker 2] episodes_seen=780 last_return=-83.7 (+1 eps) [worker 0] episodes_seen=770 last_return=-50.3 (+1 eps) [worker 1] episodes_seen=790 last_return=-129.1 (+1 eps) [worker 3] episodes_seen=790 last_return=-101.3 (+1 eps) [worker 2] episodes_seen=790 last_return=-60.7 (+1 eps) [worker 0] episodes_seen=780 last_return=-143.4 (+1 eps) [worker 1] episodes_seen=800 last_return=-63.5 (+1 eps) [worker 3] episodes_seen=800 last_return=-182.4 (+1 eps) [worker 2] episodes_seen=800 last_return=-73.1 (+1 eps) [worker 0] episodes_seen=790 last_return=-47.0 (+1 eps) [worker 1] episodes_seen=810 last_return=-105.7 (+1 eps) [worker 3] episodes_seen=810 last_return=-30.5 (+1 eps) [worker 2] episodes_seen=810 last_return=-92.8 (+1 eps) [worker 0] episodes_seen=800 last_return=-106.4 (+1 eps) [worker 1] episodes_seen=820 last_return=-72.4 (+1 eps) [worker 3] episodes_seen=820 last_return=-101.9 (+1 eps) [worker 2] episodes_seen=820 last_return=-101.0 (+1 eps) [worker 0] episodes_seen=810 last_return=-109.3 (+1 eps) [worker 1] episodes_seen=830 last_return=-81.1 (+1 eps) [worker 3] episodes_seen=830 last_return=-240.7 (+1 eps) [worker 2] episodes_seen=830 last_return=-81.2 (+1 eps) [worker 0] episodes_seen=820 last_return=-117.5 (+1 eps) [worker 1] episodes_seen=840 last_return=-271.6 (+1 eps) [worker 3] episodes_seen=840 last_return=-79.8 (+1 eps) [worker 2] episodes_seen=840 last_return=-75.8 (+1 eps) [worker 0] episodes_seen=830 last_return=-90.2 (+1 eps) [worker 1] episodes_seen=850 last_return=-72.7 (+1 eps) [worker 3] episodes_seen=850 last_return=-94.7 (+1 eps) [worker 2] episodes_seen=850 last_return=-149.6 (+1 eps) [worker 0] episodes_seen=840 last_return=-85.1 (+1 eps) [worker 1] episodes_seen=860 last_return=-75.2 (+1 eps) [worker 3] episodes_seen=860 last_return=-222.3 (+1 eps) [worker 2] episodes_seen=860 last_return=-346.5 (+1 eps) [worker 0] episodes_seen=850 last_return=-105.6 (+1 eps) [worker 3] episodes_seen=870 last_return=-112.8 (+1 eps) [worker 1] episodes_seen=870 last_return=-98.4 (+1 eps) [worker 0] episodes_seen=860 last_return=-314.0 (+1 eps) [worker 2] episodes_seen=870 last_return=-241.2 (+1 eps) [worker 3] episodes_seen=880 last_return=-83.8 (+1 eps) [A2C][sync] it= 2673 steps= 320760 (+120) avg10=-127.11 loss=5232.430 pg=-0.002 vf=9513.544 H=1.173 gn=5768.753 [worker 1] episodes_seen=880 last_return=-138.0 (+1 eps) [worker 0] episodes_seen=870 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=880 last_return=-55.2 (+1 eps) [worker 3] episodes_seen=890 last_return=-246.7 (+1 eps) [worker 1] episodes_seen=890 last_return=-262.3 (+1 eps) [worker 2] episodes_seen=890 last_return=-207.1 (+1 eps) [worker 0] episodes_seen=880 last_return=-121.0 (+1 eps) [worker 3] episodes_seen=900 last_return=-103.8 (+1 eps) [worker 1] episodes_seen=900 last_return=-158.1 (+1 eps) [worker 0] episodes_seen=890 last_return=-341.3 (+1 eps) [worker 2] episodes_seen=900 last_return=-365.4 (+1 eps) [worker 3] episodes_seen=910 last_return=-114.3 (+1 eps) [worker 1] episodes_seen=910 last_return=-256.6 (+1 eps) [worker 2] episodes_seen=910 last_return=-223.9 (+1 eps) [worker 0] episodes_seen=900 last_return=-486.2 (+1 eps) [worker 3] episodes_seen=920 last_return=-322.6 (+1 eps) [worker 1] episodes_seen=920 last_return=-335.6 (+1 eps) [worker 2] episodes_seen=920 last_return=-318.2 (+1 eps) [worker 0] episodes_seen=910 last_return=-250.7 (+1 eps) [worker 3] episodes_seen=930 last_return=-200.6 (+1 eps) [worker 1] episodes_seen=930 last_return=-135.6 (+1 eps) [worker 2] episodes_seen=930 last_return=-160.8 (+1 eps) [worker 0] episodes_seen=920 last_return=-190.4 (+1 eps) [worker 3] episodes_seen=940 last_return=-110.8 (+1 eps) [worker 1] episodes_seen=940 last_return=-133.3 (+1 eps) [worker 2] episodes_seen=940 last_return=-303.9 (+1 eps) [worker 0] episodes_seen=930 last_return=-190.9 (+1 eps) [worker 3] episodes_seen=950 last_return=-110.9 (+1 eps) [worker 1] episodes_seen=950 last_return=-19.2 (+1 eps) [worker 2] episodes_seen=950 last_return=-113.3 (+1 eps) [worker 0] episodes_seen=940 last_return=-240.9 (+1 eps) [worker 3] episodes_seen=960 last_return=-178.2 (+1 eps) [worker 1] episodes_seen=960 last_return=-122.7 (+1 eps) [worker 2] episodes_seen=960 last_return=-225.4 (+1 eps) [worker 0] episodes_seen=950 last_return=-144.8 (+1 eps) [worker 3] episodes_seen=970 last_return=-254.6 (+1 eps) [worker 1] episodes_seen=970 last_return=-138.0 (+1 eps) [worker 2] episodes_seen=970 last_return=-263.3 (+1 eps) [worker 0] episodes_seen=960 last_return=-153.6 (+1 eps) [worker 3] episodes_seen=980 last_return=-106.4 (+1 eps) [worker 2] episodes_seen=980 last_return=-117.3 (+1 eps) [worker 1] episodes_seen=980 last_return=-109.4 (+1 eps) [worker 0] episodes_seen=970 last_return=-97.8 (+1 eps) [worker 3] episodes_seen=990 last_return=-127.5 (+1 eps) [worker 2] episodes_seen=990 last_return=-101.8 (+1 eps) [worker 1] episodes_seen=990 last_return=-162.1 (+1 eps) [worker 0] episodes_seen=980 last_return=-72.1 (+1 eps) [worker 3] episodes_seen=1000 last_return=-165.9 (+1 eps) [worker 2] episodes_seen=1000 last_return=-122.8 (+1 eps) [A2C][sync] it= 3007 steps= 360840 (+120) avg10=-232.72 loss=5799.040 pg=-0.058 vf=10543.829 H=0.526 gn=18431.703 [worker 1] episodes_seen=1000 last_return=-317.3 (+1 eps) [worker 0] episodes_seen=990 last_return=-120.0 (+1 eps) [worker 3] episodes_seen=1010 last_return=-234.9 (+1 eps) [worker 2] episodes_seen=1010 last_return=-221.7 (+1 eps) [worker 1] episodes_seen=1010 last_return=-294.2 (+1 eps) [worker 0] episodes_seen=1000 last_return=-239.1 (+1 eps) [worker 3] episodes_seen=1020 last_return=-131.0 (+1 eps) [worker 1] episodes_seen=1020 last_return=-417.8 (+1 eps) [worker 0] episodes_seen=1010 last_return=-356.1 (+1 eps) [worker 3] episodes_seen=1030 last_return=-137.2 (+1 eps) [worker 2] episodes_seen=1020 last_return=-239.6 (+1 eps) [worker 1] episodes_seen=1030 last_return=-162.6 (+1 eps) [worker 0] episodes_seen=1020 last_return=-65.0 (+1 eps) [worker 3] episodes_seen=1040 last_return=-102.9 (+1 eps) [worker 1] episodes_seen=1040 last_return=-123.4 (+1 eps) [worker 2] episodes_seen=1030 last_return=-69.1 (+1 eps) [worker 0] episodes_seen=1030 last_return=-150.5 (+1 eps) [worker 3] episodes_seen=1050 last_return=-315.7 (+1 eps) [worker 2] episodes_seen=1040 last_return=-317.7 (+1 eps) [worker 1] episodes_seen=1050 last_return=-159.3 (+1 eps) [worker 0] episodes_seen=1040 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=1060 last_return=-123.5 (+1 eps) [worker 2] episodes_seen=1050 last_return=-237.5 (+1 eps) [worker 1] episodes_seen=1060 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1050 last_return=-117.4 (+1 eps) [worker 3] episodes_seen=1070 last_return=-34.0 (+1 eps) [worker 2] episodes_seen=1060 last_return=-107.1 (+1 eps) [worker 1] episodes_seen=1070 last_return=-98.1 (+1 eps) [worker 0] episodes_seen=1060 last_return=-177.0 (+1 eps) [worker 3] episodes_seen=1080 last_return=-129.4 (+1 eps) [worker 2] episodes_seen=1070 last_return=-107.7 (+1 eps) [worker 1] episodes_seen=1080 last_return=-162.3 (+1 eps) [worker 0] episodes_seen=1070 last_return=-94.1 (+1 eps) [worker 3] episodes_seen=1090 last_return=-78.7 (+1 eps) [worker 2] episodes_seen=1080 last_return=-90.1 (+1 eps) [worker 1] episodes_seen=1090 last_return=-95.5 (+1 eps) [worker 0] episodes_seen=1080 last_return=-95.7 (+1 eps) [worker 3] episodes_seen=1100 last_return=-87.6 (+1 eps) [worker 1] episodes_seen=1100 last_return=-77.6 (+1 eps) [worker 2] episodes_seen=1090 last_return=-89.8 (+1 eps) [worker 3] episodes_seen=1110 last_return=-109.9 (+1 eps) [worker 0] episodes_seen=1090 last_return=-101.6 (+1 eps) [worker 1] episodes_seen=1110 last_return=-51.3 (+1 eps) [worker 2] episodes_seen=1100 last_return=-85.2 (+1 eps) [worker 3] episodes_seen=1120 last_return=-104.1 (+1 eps) [worker 0] episodes_seen=1100 last_return=-94.6 (+1 eps) [worker 1] episodes_seen=1120 last_return=-74.0 (+1 eps) [worker 2] episodes_seen=1110 last_return=-117.1 (+1 eps) [A2C][sync] it= 3341 steps= 400920 (+120) avg10= -92.15 loss=57.412 pg=-0.070 vf=104.546 H=1.234 gn=187.244 [worker 3] episodes_seen=1130 last_return=-88.2 (+1 eps) [worker 0] episodes_seen=1110 last_return=-150.4 (+1 eps) [worker 1] episodes_seen=1130 last_return=-197.7 (+1 eps) [worker 2] episodes_seen=1120 last_return=-59.9 (+1 eps) [worker 3] episodes_seen=1140 last_return=-374.2 (+1 eps) [worker 0] episodes_seen=1120 last_return=-277.5 (+1 eps) [worker 1] episodes_seen=1140 last_return=-230.6 (+1 eps) [worker 2] episodes_seen=1130 last_return=-133.1 (+1 eps) [worker 3] episodes_seen=1150 last_return=-165.1 (+1 eps) [worker 0] episodes_seen=1130 last_return=-333.7 (+1 eps) [worker 1] episodes_seen=1150 last_return=-273.6 (+1 eps) [worker 2] episodes_seen=1140 last_return=-125.9 (+1 eps) [worker 3] episodes_seen=1160 last_return=-135.1 (+1 eps) [worker 0] episodes_seen=1140 last_return=-155.6 (+1 eps) [worker 1] episodes_seen=1160 last_return=-93.0 (+1 eps) [worker 2] episodes_seen=1150 last_return=-198.3 (+1 eps) [worker 3] episodes_seen=1170 last_return=-61.9 (+1 eps) [worker 0] episodes_seen=1150 last_return=-219.9 (+1 eps) [worker 1] episodes_seen=1170 last_return=-134.3 (+1 eps) [worker 2] episodes_seen=1160 last_return=-68.5 (+1 eps) [worker 3] episodes_seen=1180 last_return=-142.5 (+1 eps) [worker 0] episodes_seen=1160 last_return=-85.7 (+1 eps) [worker 1] episodes_seen=1180 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1170 last_return=-296.2 (+1 eps) [worker 3] episodes_seen=1190 last_return=-83.3 (+1 eps) [worker 0] episodes_seen=1170 last_return=-126.0 (+1 eps) [worker 1] episodes_seen=1190 last_return=-72.0 (+1 eps) [worker 2] episodes_seen=1180 last_return=90.8 (+1 eps) [worker 3] episodes_seen=1200 last_return=-97.3 (+1 eps) [worker 0] episodes_seen=1180 last_return=-79.8 (+1 eps) [worker 1] episodes_seen=1200 last_return=17.1 (+1 eps) [worker 2] episodes_seen=1190 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=1210 last_return=-87.2 (+1 eps) [worker 0] episodes_seen=1190 last_return=-121.7 (+1 eps) [worker 1] episodes_seen=1210 last_return=-230.8 (+1 eps) [worker 2] episodes_seen=1200 last_return=24.3 (+1 eps) [worker 3] episodes_seen=1220 last_return=-98.9 (+1 eps) [worker 0] episodes_seen=1200 last_return=-38.0 (+1 eps) [worker 1] episodes_seen=1220 last_return=-103.2 (+1 eps) [worker 2] episodes_seen=1210 last_return=-82.0 (+1 eps) [worker 3] episodes_seen=1230 last_return=-100.8 (+1 eps) [worker 0] episodes_seen=1210 last_return=-78.4 (+1 eps) [worker 1] episodes_seen=1230 last_return=-86.9 (+1 eps) [worker 2] episodes_seen=1220 last_return=-138.3 (+1 eps) [worker 3] episodes_seen=1240 last_return=-131.9 (+1 eps) [worker 0] episodes_seen=1220 last_return=-245.0 (+1 eps) [worker 1] episodes_seen=1240 last_return=-392.4 (+1 eps) [worker 2] episodes_seen=1230 last_return=-153.9 (+1 eps) [A2C][sync] it= 3675 steps= 441000 (+120) avg10=-172.11 loss=6555.458 pg=-0.002 vf=11919.019 H=0.040 gn=11048.626 [worker 3] episodes_seen=1250 last_return=-339.6 (+1 eps) [worker 0] episodes_seen=1230 last_return=-231.6 (+1 eps) [worker 1] episodes_seen=1250 last_return=-142.0 (+1 eps) [worker 2] episodes_seen=1240 last_return=-111.4 (+1 eps) [worker 3] episodes_seen=1260 last_return=-87.6 (+1 eps) [worker 0] episodes_seen=1240 last_return=-209.4 (+1 eps) [worker 1] episodes_seen=1260 last_return=-265.6 (+1 eps) [worker 2] episodes_seen=1250 last_return=-354.0 (+1 eps) [worker 3] episodes_seen=1270 last_return=-269.7 (+1 eps) [worker 0] episodes_seen=1250 last_return=-254.5 (+1 eps) [worker 1] episodes_seen=1270 last_return=-347.8 (+1 eps) [worker 2] episodes_seen=1260 last_return=-248.7 (+1 eps) [worker 3] episodes_seen=1280 last_return=-140.1 (+1 eps) [worker 0] episodes_seen=1260 last_return=-411.7 (+1 eps) [worker 1] episodes_seen=1280 last_return=-254.5 (+1 eps) [worker 2] episodes_seen=1270 last_return=-347.1 (+1 eps) [worker 3] episodes_seen=1290 last_return=-286.5 (+1 eps) [worker 1] episodes_seen=1290 last_return=-194.7 (+1 eps) [worker 0] episodes_seen=1270 last_return=-264.4 (+1 eps) [worker 2] episodes_seen=1280 last_return=-27.2 (+1 eps) [worker 3] episodes_seen=1300 last_return=-220.6 (+1 eps) [worker 1] episodes_seen=1300 last_return=-240.5 (+1 eps) [worker 0] episodes_seen=1280 last_return=-266.1 (+1 eps) [worker 2] episodes_seen=1290 last_return=-288.2 (+1 eps) [worker 3] episodes_seen=1310 last_return=-216.1 (+1 eps) [worker 1] episodes_seen=1310 last_return=-311.6 (+1 eps) [worker 0] episodes_seen=1290 last_return=-323.5 (+1 eps) [worker 2] episodes_seen=1300 last_return=-251.6 (+1 eps) [worker 3] episodes_seen=1320 last_return=-199.7 (+1 eps) [worker 1] episodes_seen=1320 last_return=-217.2 (+1 eps) [worker 0] episodes_seen=1300 last_return=-398.9 (+1 eps) [worker 2] episodes_seen=1310 last_return=-439.8 (+1 eps) [worker 1] episodes_seen=1330 last_return=-172.9 (+1 eps) [worker 3] episodes_seen=1330 last_return=-128.1 (+1 eps) [worker 0] episodes_seen=1310 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1320 last_return=-209.4 (+1 eps) [worker 3] episodes_seen=1340 last_return=-180.9 (+1 eps) [worker 1] episodes_seen=1340 last_return=-440.7 (+1 eps) [worker 0] episodes_seen=1320 last_return=-422.4 (+1 eps) [worker 2] episodes_seen=1330 last_return=-227.9 (+1 eps) [worker 3] episodes_seen=1350 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1350 last_return=-431.3 (+1 eps) [worker 0] episodes_seen=1330 last_return=-125.9 (+1 eps) [worker 2] episodes_seen=1340 last_return=-364.3 (+1 eps) [worker 3] episodes_seen=1360 last_return=-371.7 (+1 eps) [worker 1] episodes_seen=1360 last_return=-287.1 (+1 eps) [worker 0] episodes_seen=1340 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1350 last_return=-249.7 (+1 eps) [worker 3] episodes_seen=1370 last_return=-226.7 (+1 eps) [worker 1] episodes_seen=1370 last_return=-124.3 (+1 eps) [worker 0] episodes_seen=1350 last_return=-190.1 (+1 eps) [worker 3] episodes_seen=1380 last_return=-304.2 (+1 eps) [worker 2] episodes_seen=1360 last_return=-219.5 (+1 eps) [worker 1] episodes_seen=1380 last_return=-125.5 (+1 eps) [worker 0] episodes_seen=1360 last_return=-213.0 (+1 eps) [worker 3] episodes_seen=1390 last_return=-147.5 (+1 eps) [worker 2] episodes_seen=1370 last_return=-289.7 (+1 eps) [worker 1] episodes_seen=1390 last_return=-267.1 (+1 eps) [worker 0] episodes_seen=1370 last_return=-261.6 (+1 eps) [A2C][sync] it= 4009 steps= 481080 (+120) avg10=-288.28 loss=244.559 pg=-0.000 vf=444.653 H=0.010 gn=3421.200 [worker 3] episodes_seen=1400 last_return=-221.6 (+1 eps) [worker 2] episodes_seen=1380 last_return=-435.3 (+1 eps) [worker 1] episodes_seen=1400 last_return=-397.5 (+1 eps) [worker 0] episodes_seen=1380 last_return=-162.1 (+1 eps) [worker 3] episodes_seen=1410 last_return=-354.7 (+1 eps) [worker 2] episodes_seen=1390 last_return=-103.8 (+1 eps) [worker 1] episodes_seen=1410 last_return=-105.4 (+1 eps) [worker 0] episodes_seen=1390 last_return=-449.0 (+1 eps) [worker 3] episodes_seen=1420 last_return=-234.7 (+1 eps) [worker 2] episodes_seen=1400 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1420 last_return=-77.4 (+1 eps) [worker 0] episodes_seen=1400 last_return=-302.6 (+1 eps) [worker 3] episodes_seen=1430 last_return=-258.1 (+1 eps) [worker 2] episodes_seen=1410 last_return=-204.3 (+1 eps) [worker 1] episodes_seen=1430 last_return=-302.2 (+1 eps) [worker 0] episodes_seen=1410 last_return=-358.1 (+1 eps) [worker 3] episodes_seen=1440 last_return=-346.9 (+1 eps) [worker 2] episodes_seen=1420 last_return=-303.1 (+1 eps) [worker 1] episodes_seen=1440 last_return=-190.0 (+1 eps) [worker 0] episodes_seen=1420 last_return=-385.5 (+1 eps) [worker 3] episodes_seen=1450 last_return=-103.3 (+1 eps) [worker 2] episodes_seen=1430 last_return=-5.0 (+1 eps) [worker 1] episodes_seen=1450 last_return=-268.8 (+1 eps) [worker 0] episodes_seen=1430 last_return=-223.9 (+1 eps) [worker 3] episodes_seen=1460 last_return=-110.9 (+1 eps) [worker 2] episodes_seen=1440 last_return=-329.3 (+1 eps) [worker 1] episodes_seen=1460 last_return=-430.3 (+1 eps) [worker 0] episodes_seen=1440 last_return=-228.8 (+1 eps) [worker 2] episodes_seen=1450 last_return=-311.4 (+1 eps) [worker 3] episodes_seen=1470 last_return=-235.1 (+1 eps) [worker 0] episodes_seen=1450 last_return=-254.4 (+1 eps) [worker 1] episodes_seen=1470 last_return=-263.3 (+1 eps) [worker 3] episodes_seen=1480 last_return=-123.0 (+1 eps) [worker 2] episodes_seen=1460 last_return=-230.9 (+1 eps) [worker 0] episodes_seen=1460 last_return=-430.4 (+1 eps) [worker 1] episodes_seen=1480 last_return=-364.1 (+1 eps) [worker 3] episodes_seen=1490 last_return=-260.6 (+1 eps) [worker 2] episodes_seen=1470 last_return=-362.7 (+1 eps) [worker 0] episodes_seen=1470 last_return=-260.2 (+1 eps) [worker 1] episodes_seen=1490 last_return=-483.9 (+1 eps) [worker 2] episodes_seen=1480 last_return=-367.2 (+1 eps) [worker 3] episodes_seen=1500 last_return=-207.2 (+1 eps) [worker 0] episodes_seen=1480 last_return=-286.0 (+1 eps) [A2C][sync] it= 4343 steps= 521160 (+120) avg10=-227.96 loss=3613.908 pg=0.005 vf=6570.736 H=0.084 gn=19650.525 [worker 2] episodes_seen=1490 last_return=-153.2 (+1 eps) [worker 1] episodes_seen=1500 last_return=-221.4 (+1 eps) [worker 3] episodes_seen=1510 last_return=-178.2 (+1 eps) [worker 0] episodes_seen=1490 last_return=-245.1 (+1 eps) [worker 1] episodes_seen=1510 last_return=-223.2 (+1 eps) [worker 2] episodes_seen=1500 last_return=-360.4 (+1 eps) [worker 3] episodes_seen=1520 last_return=-325.5 (+1 eps) [worker 0] episodes_seen=1500 last_return=-229.3 (+1 eps) [worker 1] episodes_seen=1520 last_return=-305.4 (+1 eps) [worker 2] episodes_seen=1510 last_return=-232.5 (+1 eps) [worker 3] episodes_seen=1530 last_return=-116.6 (+1 eps) [worker 0] episodes_seen=1510 last_return=-133.5 (+1 eps) [worker 1] episodes_seen=1530 last_return=-110.5 (+1 eps) [worker 2] episodes_seen=1520 last_return=-314.6 (+1 eps) [worker 3] episodes_seen=1540 last_return=-133.4 (+1 eps) [worker 0] episodes_seen=1520 last_return=-252.4 (+1 eps) [worker 1] episodes_seen=1540 last_return=-249.1 (+1 eps) [worker 3] episodes_seen=1550 last_return=-193.6 (+1 eps) [worker 2] episodes_seen=1530 last_return=-385.1 (+1 eps) [worker 0] episodes_seen=1530 last_return=-180.0 (+1 eps) [worker 1] episodes_seen=1550 last_return=-210.3 (+1 eps) [worker 3] episodes_seen=1560 last_return=-257.7 (+1 eps) [worker 2] episodes_seen=1540 last_return=-529.0 (+1 eps) [worker 0] episodes_seen=1540 last_return=-215.0 (+1 eps) [worker 1] episodes_seen=1560 last_return=-338.3 (+1 eps) [worker 3] episodes_seen=1570 last_return=-140.8 (+1 eps) [worker 2] episodes_seen=1550 last_return=-184.7 (+1 eps) [worker 0] episodes_seen=1550 last_return=-79.0 (+1 eps) [worker 1] episodes_seen=1570 last_return=-170.2 (+1 eps) [worker 3] episodes_seen=1580 last_return=-435.9 (+1 eps) [worker 2] episodes_seen=1560 last_return=-181.8 (+1 eps) [worker 0] episodes_seen=1560 last_return=-318.9 (+1 eps) [worker 1] episodes_seen=1580 last_return=-131.2 (+1 eps) [worker 3] episodes_seen=1590 last_return=-131.8 (+1 eps) [worker 2] episodes_seen=1570 last_return=-60.5 (+1 eps) [worker 0] episodes_seen=1570 last_return=-123.0 (+1 eps) [worker 1] episodes_seen=1590 last_return=-121.1 (+1 eps) [worker 3] episodes_seen=1600 last_return=-52.6 (+1 eps) [worker 2] episodes_seen=1580 last_return=-137.3 (+1 eps) [worker 0] episodes_seen=1580 last_return=-94.1 (+1 eps) [worker 3] episodes_seen=1610 last_return=-178.6 (+1 eps) [worker 2] episodes_seen=1590 last_return=-99.5 (+1 eps) [worker 0] episodes_seen=1590 last_return=-74.2 (+1 eps) [A2C][sync] it= 4677 steps= 561240 (+120) avg10=-147.23 loss=3053.318 pg=-0.163 vf=5551.812 H=1.041 gn=2970.341 [worker 1] episodes_seen=1600 last_return=-193.4 (+1 eps) [worker 3] episodes_seen=1620 last_return=-99.5 (+1 eps) [worker 2] episodes_seen=1600 last_return=-97.9 (+1 eps) [worker 0] episodes_seen=1600 last_return=-95.0 (+1 eps) [worker 1] episodes_seen=1610 last_return=-90.9 (+1 eps) [worker 3] episodes_seen=1630 last_return=34.5 (+1 eps) [worker 2] episodes_seen=1610 last_return=-99.9 (+1 eps) [worker 0] episodes_seen=1610 last_return=-108.8 (+1 eps) [worker 1] episodes_seen=1620 last_return=-100.3 (+1 eps) [worker 3] episodes_seen=1640 last_return=-57.5 (+1 eps) [worker 2] episodes_seen=1620 last_return=-118.4 (+1 eps) [worker 0] episodes_seen=1620 last_return=-100.8 (+1 eps) [worker 1] episodes_seen=1630 last_return=-67.9 (+1 eps) [worker 3] episodes_seen=1650 last_return=-70.5 (+1 eps) [worker 2] episodes_seen=1630 last_return=-62.3 (+1 eps) [worker 0] episodes_seen=1630 last_return=-100.2 (+1 eps) [worker 1] episodes_seen=1640 last_return=-92.9 (+1 eps) [worker 3] episodes_seen=1660 last_return=-101.9 (+1 eps) [worker 2] episodes_seen=1640 last_return=-95.7 (+1 eps) [worker 0] episodes_seen=1640 last_return=-105.0 (+1 eps) [worker 1] episodes_seen=1650 last_return=-0.2 (+1 eps) [worker 3] episodes_seen=1670 last_return=-110.6 (+1 eps) [worker 2] episodes_seen=1650 last_return=-63.4 (+1 eps) [worker 0] episodes_seen=1650 last_return=-71.5 (+1 eps) [worker 1] episodes_seen=1660 last_return=-236.7 (+1 eps) [worker 3] episodes_seen=1680 last_return=-108.4 (+1 eps) [worker 2] episodes_seen=1660 last_return=-100.7 (+1 eps) [worker 0] episodes_seen=1660 last_return=-88.0 (+1 eps) [worker 1] episodes_seen=1670 last_return=-97.2 (+1 eps) [worker 3] episodes_seen=1690 last_return=-86.5 (+1 eps) [worker 2] episodes_seen=1670 last_return=-97.5 (+1 eps) [worker 0] episodes_seen=1670 last_return=-158.5 (+1 eps) [worker 3] episodes_seen=1700 last_return=-217.9 (+1 eps) [worker 1] episodes_seen=1680 last_return=-104.9 (+1 eps) [worker 2] episodes_seen=1680 last_return=-277.6 (+1 eps) [worker 0] episodes_seen=1680 last_return=-215.6 (+1 eps) [worker 3] episodes_seen=1710 last_return=-385.8 (+1 eps) [worker 1] episodes_seen=1690 last_return=-34.9 (+1 eps) [worker 2] episodes_seen=1690 last_return=-344.2 (+1 eps) [worker 0] episodes_seen=1690 last_return=-455.8 (+1 eps) [worker 1] episodes_seen=1700 last_return=-257.5 (+1 eps) [worker 3] episodes_seen=1720 last_return=-231.3 (+1 eps) [worker 2] episodes_seen=1700 last_return=-245.5 (+1 eps) [worker 0] episodes_seen=1700 last_return=-231.7 (+1 eps) [worker 3] episodes_seen=1730 last_return=-255.5 (+1 eps) [worker 1] episodes_seen=1710 last_return=-310.1 (+1 eps) [worker 2] episodes_seen=1710 last_return=-460.8 (+1 eps) [worker 0] episodes_seen=1710 last_return=-157.3 (+1 eps) [A2C][sync] it= 5011 steps= 601320 (+120) avg10= -96.56 loss=1396.889 pg=-0.122 vf=2540.045 H=0.919 gn=4993.050 [worker 3] episodes_seen=1740 last_return=-91.2 (+1 eps) [worker 1] episodes_seen=1720 last_return=-64.2 (+1 eps) [worker 2] episodes_seen=1720 last_return=-88.9 (+1 eps) [worker 0] episodes_seen=1720 last_return=-66.9 (+1 eps) [worker 3] episodes_seen=1750 last_return=-87.4 (+1 eps) [worker 1] episodes_seen=1730 last_return=-108.5 (+1 eps) [worker 2] episodes_seen=1730 last_return=-47.7 (+1 eps) [worker 0] episodes_seen=1730 last_return=-87.6 (+1 eps) [worker 3] episodes_seen=1760 last_return=-150.6 (+1 eps) [worker 1] episodes_seen=1740 last_return=-44.3 (+1 eps) [worker 2] episodes_seen=1740 last_return=-99.5 (+1 eps) [worker 0] episodes_seen=1740 last_return=-72.9 (+1 eps) [worker 3] episodes_seen=1770 last_return=-148.1 (+1 eps) [worker 1] episodes_seen=1750 last_return=-83.0 (+1 eps) [worker 2] episodes_seen=1750 last_return=-167.4 (+1 eps) [worker 0] episodes_seen=1750 last_return=-96.4 (+1 eps) [worker 1] episodes_seen=1760 last_return=-113.2 (+1 eps) [worker 2] episodes_seen=1760 last_return=-86.7 (+1 eps) [worker 3] episodes_seen=1780 last_return=-76.1 (+1 eps) [worker 0] episodes_seen=1760 last_return=-88.0 (+1 eps) [worker 1] episodes_seen=1770 last_return=-88.2 (+1 eps) [worker 2] episodes_seen=1770 last_return=-172.8 (+1 eps) [worker 3] episodes_seen=1790 last_return=-149.8 (+1 eps) [worker 0] episodes_seen=1770 last_return=-143.9 (+1 eps) [worker 1] episodes_seen=1780 last_return=-87.2 (+1 eps) [worker 3] episodes_seen=1800 last_return=-87.3 (+1 eps) [worker 2] episodes_seen=1780 last_return=-92.0 (+1 eps) [worker 0] episodes_seen=1780 last_return=-54.9 (+1 eps) [worker 1] episodes_seen=1790 last_return=-47.5 (+1 eps) [worker 3] episodes_seen=1810 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1790 last_return=-67.8 (+1 eps) [worker 0] episodes_seen=1790 last_return=-89.3 (+1 eps) [worker 3] episodes_seen=1820 last_return=-70.1 (+1 eps) [worker 1] episodes_seen=1800 last_return=-90.5 (+1 eps) [worker 2] episodes_seen=1800 last_return=-78.7 (+1 eps) [worker 0] episodes_seen=1800 last_return=-109.2 (+1 eps) [worker 3] episodes_seen=1830 last_return=-86.5 (+1 eps) [worker 1] episodes_seen=1810 last_return=-59.1 (+1 eps) [worker 2] episodes_seen=1810 last_return=-162.5 (+1 eps) [worker 0] episodes_seen=1810 last_return=-131.7 (+1 eps) [A2C][sync] it= 5345 steps= 641400 (+120) avg10=-101.83 loss=195.429 pg=-0.151 vf=355.626 H=0.948 gn=1136.656 [worker 3] episodes_seen=1840 last_return=-95.9 (+1 eps) [worker 1] episodes_seen=1820 last_return=-85.7 (+1 eps) [worker 2] episodes_seen=1820 last_return=-105.5 (+1 eps) [worker 0] episodes_seen=1820 last_return=-18.8 (+1 eps) [worker 1] episodes_seen=1830 last_return=-104.5 (+1 eps) [worker 3] episodes_seen=1850 last_return=-158.9 (+1 eps) [worker 2] episodes_seen=1830 last_return=-124.0 (+1 eps) [worker 0] episodes_seen=1830 last_return=-208.2 (+1 eps) [worker 3] episodes_seen=1860 last_return=-146.2 (+1 eps) [worker 1] episodes_seen=1840 last_return=-189.1 (+1 eps) [worker 2] episodes_seen=1840 last_return=-171.3 (+1 eps) [worker 0] episodes_seen=1840 last_return=-166.0 (+1 eps) [worker 3] episodes_seen=1870 last_return=-438.8 (+1 eps) [worker 1] episodes_seen=1850 last_return=-231.2 (+1 eps) [worker 2] episodes_seen=1850 last_return=-593.0 (+1 eps) [worker 0] episodes_seen=1850 last_return=-300.1 (+1 eps) [worker 3] episodes_seen=1880 last_return=-256.5 (+1 eps) [worker 1] episodes_seen=1860 last_return=-216.9 (+1 eps) [worker 2] episodes_seen=1860 last_return=-135.2 (+1 eps) [worker 0] episodes_seen=1860 last_return=-420.2 (+1 eps) [worker 3] episodes_seen=1890 last_return=-205.8 (+1 eps) [worker 1] episodes_seen=1870 last_return=-190.7 (+1 eps) [worker 2] episodes_seen=1870 last_return=-138.3 (+1 eps) [worker 0] episodes_seen=1870 last_return=-236.1 (+1 eps) [worker 3] episodes_seen=1900 last_return=-293.3 (+1 eps) [worker 1] episodes_seen=1880 last_return=-420.6 (+1 eps) [worker 2] episodes_seen=1880 last_return=-137.5 (+1 eps) [worker 0] episodes_seen=1880 last_return=-166.3 (+1 eps) [worker 3] episodes_seen=1910 last_return=-102.9 (+1 eps) [worker 1] episodes_seen=1890 last_return=-69.4 (+1 eps) [worker 2] episodes_seen=1890 last_return=-104.9 (+1 eps) [worker 0] episodes_seen=1890 last_return=-156.2 (+1 eps) [worker 3] episodes_seen=1920 last_return=-71.6 (+1 eps) [worker 1] episodes_seen=1900 last_return=-98.8 (+1 eps) [worker 2] episodes_seen=1900 last_return=-46.1 (+1 eps) [worker 0] episodes_seen=1900 last_return=-70.7 (+1 eps) [worker 3] episodes_seen=1930 last_return=-216.6 (+1 eps) [worker 2] episodes_seen=1910 last_return=-43.3 (+1 eps) [worker 1] episodes_seen=1910 last_return=-121.8 (+1 eps) [worker 3] episodes_seen=1940 last_return=75.4 (+1 eps) [worker 0] episodes_seen=1910 last_return=-125.6 (+1 eps) [worker 2] episodes_seen=1920 last_return=100.1 (+1 eps) [worker 1] episodes_seen=1920 last_return=-116.7 (+1 eps) [A2C][sync] it= 5679 steps= 681480 (+120) avg10=-114.70 loss=66.764 pg=-0.021 vf=121.461 H=1.216 gn=227.126 [worker 0] episodes_seen=1920 last_return=-90.6 (+1 eps) [worker 3] episodes_seen=1950 last_return=-69.2 (+1 eps) [worker 2] episodes_seen=1930 last_return=-82.6 (+1 eps) [worker 1] episodes_seen=1930 last_return=-92.8 (+1 eps) [worker 0] episodes_seen=1930 last_return=-135.3 (+1 eps) [worker 3] episodes_seen=1960 last_return=-139.1 (+1 eps) [worker 1] episodes_seen=1940 last_return=-175.6 (+1 eps) [worker 2] episodes_seen=1940 last_return=-72.0 (+1 eps) [worker 0] episodes_seen=1940 last_return=-112.2 (+1 eps) [worker 3] episodes_seen=1970 last_return=-34.5 (+1 eps) [worker 2] episodes_seen=1950 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1950 last_return=-53.7 (+1 eps) [worker 0] episodes_seen=1950 last_return=-136.7 (+1 eps) [worker 2] episodes_seen=1960 last_return=-124.3 (+1 eps) [worker 3] episodes_seen=1980 last_return=-57.9 (+1 eps) [worker 1] episodes_seen=1960 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1960 last_return=-47.4 (+1 eps) [worker 2] episodes_seen=1970 last_return=-117.0 (+1 eps) [worker 3] episodes_seen=1990 last_return=-43.0 (+1 eps) [worker 1] episodes_seen=1970 last_return=-120.9 (+1 eps) [worker 2] episodes_seen=1980 last_return=-86.2 (+1 eps) [worker 0] episodes_seen=1970 last_return=-98.2 (+1 eps) [worker 3] episodes_seen=2000 last_return=-66.0 (+1 eps) [worker 1] episodes_seen=1980 last_return=-228.3 (+1 eps) [worker 0] episodes_seen=1980 last_return=-160.3 (+1 eps) [worker 2] episodes_seen=1990 last_return=-63.5 (+1 eps) [worker 3] episodes_seen=2010 last_return=-72.4 (+1 eps) [worker 1] episodes_seen=1990 last_return=-166.4 (+1 eps) [worker 2] episodes_seen=2000 last_return=-98.3 (+1 eps) [worker 0] episodes_seen=1990 last_return=-66.2 (+1 eps) [worker 3] episodes_seen=2020 last_return=-104.9 (+1 eps) [worker 2] episodes_seen=2010 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2000 last_return=-173.0 (+1 eps) [worker 0] episodes_seen=2000 last_return=-112.6 (+1 eps) [worker 3] episodes_seen=2030 last_return=-87.4 (+1 eps) [worker 2] episodes_seen=2020 last_return=-112.0 (+1 eps) [worker 1] episodes_seen=2010 last_return=-143.6 (+1 eps) [A2C][sync] it= 6013 steps= 721560 (+120) avg10=-106.40 loss=29.208 pg=-0.058 vf=53.242 H=1.087 gn=228.646 [worker 0] episodes_seen=2010 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=2040 last_return=-65.4 (+1 eps) [worker 2] episodes_seen=2030 last_return=-85.8 (+1 eps) [worker 1] episodes_seen=2020 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=2050 last_return=-112.7 (+1 eps) [worker 0] episodes_seen=2020 last_return=-219.6 (+1 eps) [worker 2] episodes_seen=2040 last_return=-122.8 (+1 eps) [worker 1] episodes_seen=2030 last_return=-287.2 (+1 eps) [worker 3] episodes_seen=2060 last_return=-123.2 (+1 eps) [worker 0] episodes_seen=2030 last_return=-301.9 (+1 eps) [worker 2] episodes_seen=2050 last_return=-277.6 (+1 eps) [worker 1] episodes_seen=2040 last_return=-181.1 (+1 eps) [worker 3] episodes_seen=2070 last_return=-246.0 (+1 eps) [worker 0] episodes_seen=2040 last_return=-106.4 (+1 eps) [worker 2] episodes_seen=2060 last_return=-291.1 (+1 eps) [worker 1] episodes_seen=2050 last_return=-74.6 (+1 eps) [worker 3] episodes_seen=2080 last_return=-83.2 (+1 eps) [worker 0] episodes_seen=2050 last_return=-183.9 (+1 eps) [worker 2] episodes_seen=2070 last_return=-83.3 (+1 eps) [worker 1] episodes_seen=2060 last_return=-33.9 (+1 eps) [worker 3] episodes_seen=2090 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=2060 last_return=-102.8 (+1 eps) [worker 2] episodes_seen=2080 last_return=-135.4 (+1 eps) [worker 3] episodes_seen=2100 last_return=-165.4 (+1 eps) [worker 1] episodes_seen=2070 last_return=-155.3 (+1 eps) [worker 0] episodes_seen=2070 last_return=-129.6 (+1 eps) [worker 2] episodes_seen=2090 last_return=-92.0 (+1 eps) [worker 1] episodes_seen=2080 last_return=-140.2 (+1 eps) [worker 3] episodes_seen=2110 last_return=-102.7 (+1 eps) [worker 0] episodes_seen=2080 last_return=-129.5 (+1 eps) [worker 2] episodes_seen=2100 last_return=-72.5 (+1 eps) [worker 3] episodes_seen=2120 last_return=-99.2 (+1 eps) [worker 1] episodes_seen=2090 last_return=-127.0 (+1 eps) [worker 0] episodes_seen=2090 last_return=-86.7 (+1 eps) [worker 2] episodes_seen=2110 last_return=-233.0 (+1 eps) [worker 3] episodes_seen=2130 last_return=-96.9 (+1 eps) [worker 1] episodes_seen=2100 last_return=-119.1 (+1 eps) [worker 0] episodes_seen=2100 last_return=-102.7 (+1 eps) [worker 2] episodes_seen=2120 last_return=-40.2 (+1 eps) [worker 1] episodes_seen=2110 last_return=-91.4 (+1 eps) [A2C][sync] it= 6347 steps= 761640 (+120) avg10= -85.55 loss=93.759 pg=-0.220 vf=170.897 H=0.961 gn=447.225 [worker 3] episodes_seen=2140 last_return=-88.6 (+1 eps) [worker 0] episodes_seen=2110 last_return=-89.9 (+1 eps) [worker 2] episodes_seen=2130 last_return=-200.3 (+1 eps) [worker 1] episodes_seen=2120 last_return=-192.5 (+1 eps) [worker 3] episodes_seen=2150 last_return=-170.0 (+1 eps) [worker 0] episodes_seen=2120 last_return=-56.5 (+1 eps) [worker 1] episodes_seen=2130 last_return=-94.6 (+1 eps) [worker 2] episodes_seen=2140 last_return=-92.4 (+1 eps) [worker 3] episodes_seen=2160 last_return=-75.2 (+1 eps) [worker 0] episodes_seen=2130 last_return=-86.0 (+1 eps) [worker 1] episodes_seen=2140 last_return=-96.9 (+1 eps) [worker 2] episodes_seen=2150 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=2170 last_return=-73.1 (+1 eps) [worker 0] episodes_seen=2140 last_return=-64.6 (+1 eps) [worker 2] episodes_seen=2160 last_return=-62.0 (+1 eps) [worker 1] episodes_seen=2150 last_return=-69.7 (+1 eps) [worker 3] episodes_seen=2180 last_return=-108.7 (+1 eps) [worker 0] episodes_seen=2150 last_return=-91.6 (+1 eps) [worker 2] episodes_seen=2170 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2160 last_return=-158.9 (+1 eps) [worker 3] episodes_seen=2190 last_return=-399.1 (+1 eps) [worker 0] episodes_seen=2160 last_return=-180.1 (+1 eps) [worker 2] episodes_seen=2180 last_return=-46.8 (+1 eps) [worker 1] episodes_seen=2170 last_return=-92.1 (+1 eps) [worker 3] episodes_seen=2200 last_return=-75.8 (+1 eps) [worker 0] episodes_seen=2170 last_return=-96.6 (+1 eps) [worker 2] episodes_seen=2190 last_return=-111.3 (+1 eps) [worker 1] episodes_seen=2180 last_return=-16.3 (+1 eps) [worker 0] episodes_seen=2180 last_return=-136.5 (+1 eps) [worker 3] episodes_seen=2210 last_return=-127.0 (+1 eps) [A2C][sync] it= 6681 steps= 801720 (+120) avg10=-169.78 loss=2659.721 pg=-0.024 vf=4835.923 H=0.844 gn=8886.581 [worker 2] episodes_seen=2200 last_return=-130.1 (+1 eps) [worker 1] episodes_seen=2190 last_return=-121.5 (+1 eps) [worker 0] episodes_seen=2190 last_return=-82.1 (+1 eps) [worker 3] episodes_seen=2220 last_return=-53.9 (+1 eps) [worker 2] episodes_seen=2210 last_return=-105.5 (+1 eps) [worker 1] episodes_seen=2200 last_return=-46.5 (+1 eps) [worker 0] episodes_seen=2200 last_return=-61.5 (+1 eps) [worker 3] episodes_seen=2230 last_return=-83.1 (+1 eps) [worker 2] episodes_seen=2220 last_return=-41.4 (+1 eps) [worker 1] episodes_seen=2210 last_return=-81.8 (+1 eps) [worker 3] episodes_seen=2240 last_return=-55.9 (+1 eps) [worker 1] episodes_seen=2220 last_return=-75.0 (+1 eps) [worker 2] episodes_seen=2230 last_return=-66.0 (+1 eps) [worker 0] episodes_seen=2210 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=2250 last_return=-40.2 (+1 eps) [worker 0] episodes_seen=2220 last_return=-56.1 (+1 eps) [worker 1] episodes_seen=2230 last_return=-46.9 (+1 eps) [worker 2] episodes_seen=2240 last_return=-102.1 (+1 eps) [worker 1] episodes_seen=2240 last_return=-81.7 (+1 eps) [worker 0] episodes_seen=2230 last_return=-102.7 (+1 eps) [worker 3] episodes_seen=2260 last_return=-76.7 (+1 eps) [worker 2] episodes_seen=2250 last_return=-63.8 (+1 eps) [worker 1] episodes_seen=2250 last_return=-60.9 (+1 eps) [worker 3] episodes_seen=2270 last_return=-73.7 (+1 eps) [worker 2] episodes_seen=2260 last_return=-207.0 (+1 eps) [worker 1] episodes_seen=2260 last_return=-30.7 (+1 eps) [worker 0] episodes_seen=2240 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=2280 last_return=-51.2 (+1 eps) [worker 2] episodes_seen=2270 last_return=-52.3 (+1 eps) [worker 1] episodes_seen=2270 last_return=-121.4 (+1 eps) [worker 0] episodes_seen=2250 last_return=-77.6 (+1 eps) [worker 3] episodes_seen=2290 last_return=-80.2 (+1 eps) [A2C][sync] it= 7015 steps= 841800 (+120) avg10= -60.35 loss=103.384 pg=-0.053 vf=188.096 H=1.024 gn=440.291 [worker 2] episodes_seen=2280 last_return=-91.6 (+1 eps) [worker 1] episodes_seen=2280 last_return=-202.2 (+1 eps) [worker 3] episodes_seen=2300 last_return=-25.1 (+1 eps) [worker 0] episodes_seen=2260 last_return=-62.7 (+1 eps) [worker 2] episodes_seen=2290 last_return=-263.7 (+1 eps) [worker 0] episodes_seen=2270 last_return=-35.7 (+1 eps) [worker 3] episodes_seen=2310 last_return=-127.5 (+1 eps) [worker 2] episodes_seen=2300 last_return=-60.9 (+1 eps) [worker 1] episodes_seen=2290 last_return=-34.7 (+1 eps) [worker 3] episodes_seen=2320 last_return=-74.5 (+1 eps) [worker 0] episodes_seen=2280 last_return=-184.2 (+1 eps) [worker 1] episodes_seen=2300 last_return=-69.3 (+1 eps) [worker 2] episodes_seen=2310 last_return=-74.3 (+1 eps) [worker 0] episodes_seen=2290 last_return=-72.6 (+1 eps) [worker 3] episodes_seen=2330 last_return=-120.8 (+1 eps) [worker 1] episodes_seen=2310 last_return=-90.7 (+1 eps) [worker 0] episodes_seen=2300 last_return=-35.9 (+1 eps) [worker 2] episodes_seen=2320 last_return=-133.7 (+1 eps) [worker 3] episodes_seen=2340 last_return=-91.5 (+1 eps) [worker 1] episodes_seen=2320 last_return=-140.1 (+1 eps) [worker 2] episodes_seen=2330 last_return=-53.9 (+1 eps) [worker 0] episodes_seen=2310 last_return=-32.3 (+1 eps) [worker 1] episodes_seen=2330 last_return=-59.9 (+1 eps) [worker 0] episodes_seen=2320 last_return=-78.0 (+1 eps) [worker 3] episodes_seen=2350 last_return=-83.6 (+1 eps) [worker 2] episodes_seen=2340 last_return=-177.6 (+1 eps) [worker 1] episodes_seen=2340 last_return=-162.0 (+1 eps) [worker 0] episodes_seen=2330 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=2360 last_return=-230.2 (+1 eps) [A2C][sync] it= 7349 steps= 881880 (+120) avg10=-171.55 loss=1852.893 pg=-0.049 vf=3369.017 H=1.147 gn=6397.842 [worker 2] episodes_seen=2350 last_return=-185.1 (+1 eps) [worker 1] episodes_seen=2350 last_return=-39.4 (+1 eps) [worker 0] episodes_seen=2340 last_return=-107.2 (+1 eps) [worker 3] episodes_seen=2370 last_return=-49.0 (+1 eps) [worker 2] episodes_seen=2360 last_return=-154.8 (+1 eps) [worker 0] episodes_seen=2350 last_return=-195.4 (+1 eps) [worker 3] episodes_seen=2380 last_return=-135.0 (+1 eps) [worker 1] episodes_seen=2360 last_return=-281.3 (+1 eps) [worker 2] episodes_seen=2370 last_return=-289.3 (+1 eps) [worker 0] episodes_seen=2360 last_return=-176.5 (+1 eps) [worker 1] episodes_seen=2370 last_return=-137.0 (+1 eps) [worker 3] episodes_seen=2390 last_return=-214.7 (+1 eps) [A2C][sync] it= 7500 steps= 900000 (+120) avg10=-187.01 loss=5663.394 pg=-0.048 vf=10297.175 H=0.253 gn=9359.326 [Checkpoint] Saved self-describing checkpoint → part2_artifacts/checkpoints/a2c_run7_seed1227.pth [A2C][sync] done: steps=900000 time=815.4s avg10=-187.01
[Run run7_seed1227] checkpoint: part2_artifacts/checkpoints/a2c_run7_seed1227.pth [Run run7_seed1227] training plot (tail 500): part2_artifacts/train_curve_run7_seed1227.png [Run run7_seed1227] training plot (full): part2_artifacts/train_curve_full_run7_seed1227.png [Run run7_seed1227] per-worker plot: part2_artifacts/train_curve_workers_run7_seed1227.png [Run run7_seed1227] workers-average plot: part2_artifacts/train_curve_workers_avg_run7_seed1227.png
[Eval run7_seed1227] mean=-693.82 std=113.17 min=-927.28 max=-562.23 [Eval run7_seed1227] CSV: part2_artifacts/eval10_run7_seed1227.csv [Eval run7_seed1227] plot: part2_artifacts/eval10_run7_seed1227.png [Best] ep=6 return=-562.23 seed=1233
/usr/local/lib/python3.12/dist-packages/gymnasium/wrappers/rendering.py:293: UserWarning: WARN: Overwriting existing videos at /content/part2_artifacts/videos/run7_seed1227 folder (try specifying a different `video_folder` for the `RecordVideo` wrapper if this is not desired)
logger.warn(
[Video run7_seed1227] episode return=-562.23 [Video run7_seed1227] saved under: part2_artifacts/videos run7_seed1227 | mean=-693.8±113.2 | best_ep=6, best_ret=-562.2
Run#8
run_id = f"run8_seed{SEED}"
# Train (multi-worker) and save checkpoint & plots
model, logs, paths = train_once(
run_id=run_id,
n_workers=4,
total_env_steps=1_200_000,
T=30,
gamma=0.99,
entropy_coef=0.01,
value_coef=0.55,
max_grad_norm=0.5,
lr=2.5e-4,
log_every=50_000,
)
# Fixed-seed greedy evaluation (10 episodes)
metrics, eval_paths = evaluate_10(run_id, paths.ckpt_path)
# Record the best episode from eval-10 using its seed
video_dir, best_idx, best_ret = record_best_from_eval(run_id, paths.ckpt_path)
print(f"{run_id} | mean={metrics['mean']:.1f}±{metrics['std']:.1f} | best_ep={best_idx}, best_ret={best_ret:.1f}")
[Run run8_seed1227] starting training… [A2C][sync] start: workers=4, T=30, target_steps=1200000, mp=fork [A2C][sync] it= 1 steps= 120 (+120) avg10= nan loss=164.375 pg=0.000 vf=298.888 H=1.386 gn=34.322 [worker 0] episodes_seen=10 last_return=-62.2 (+1 eps) [worker 2] episodes_seen=10 last_return=-97.0 (+1 eps) [worker 3] episodes_seen=10 last_return=-88.0 (+1 eps) [worker 0] episodes_seen=20 last_return=-75.7 (+1 eps) [worker 1] episodes_seen=10 last_return=-161.4 (+1 eps) [worker 3] episodes_seen=20 last_return=-113.3 (+1 eps) [worker 2] episodes_seen=20 last_return=-82.2 (+1 eps) [worker 1] episodes_seen=20 last_return=-306.4 (+1 eps) [worker 2] episodes_seen=30 last_return=-65.3 (+1 eps) [worker 0] episodes_seen=30 last_return=-253.8 (+1 eps) [worker 3] episodes_seen=30 last_return=-93.9 (+1 eps) [worker 1] episodes_seen=30 last_return=-346.9 (+1 eps) [worker 0] episodes_seen=40 last_return=-303.4 (+1 eps) [worker 2] episodes_seen=40 last_return=-195.5 (+1 eps) [worker 3] episodes_seen=40 last_return=-241.9 (+1 eps) [worker 1] episodes_seen=40 last_return=-232.1 (+1 eps) [worker 2] episodes_seen=50 last_return=-235.4 (+1 eps) [worker 3] episodes_seen=50 last_return=-63.6 (+1 eps) [worker 0] episodes_seen=50 last_return=-192.5 (+1 eps) [worker 1] episodes_seen=50 last_return=-347.5 (+1 eps) [worker 0] episodes_seen=60 last_return=-123.0 (+1 eps) [worker 2] episodes_seen=60 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=60 last_return=-238.4 (+1 eps) [worker 1] episodes_seen=60 last_return=-241.9 (+1 eps) [worker 0] episodes_seen=70 last_return=-4.7 (+1 eps) [worker 2] episodes_seen=70 last_return=-150.3 (+1 eps) [worker 3] episodes_seen=70 last_return=-344.4 (+1 eps) [worker 1] episodes_seen=70 last_return=-270.5 (+1 eps) [worker 0] episodes_seen=80 last_return=-312.3 (+1 eps) [worker 3] episodes_seen=80 last_return=-312.4 (+1 eps) [worker 2] episodes_seen=80 last_return=-419.0 (+1 eps) [worker 1] episodes_seen=80 last_return=-541.2 (+1 eps) [worker 0] episodes_seen=90 last_return=-233.4 (+1 eps) [worker 3] episodes_seen=90 last_return=-196.7 (+1 eps) [worker 2] episodes_seen=90 last_return=-262.5 (+1 eps) [worker 1] episodes_seen=90 last_return=-326.4 (+1 eps) [worker 0] episodes_seen=100 last_return=-226.7 (+1 eps) [worker 2] episodes_seen=100 last_return=-274.4 (+1 eps) [worker 3] episodes_seen=100 last_return=-146.0 (+1 eps) [worker 1] episodes_seen=100 last_return=-184.2 (+1 eps) [worker 0] episodes_seen=110 last_return=-249.1 (+1 eps) [worker 2] episodes_seen=110 last_return=-107.4 (+1 eps) [worker 3] episodes_seen=110 last_return=-372.4 (+1 eps) [worker 1] episodes_seen=110 last_return=-169.1 (+1 eps) [worker 0] episodes_seen=120 last_return=-353.0 (+1 eps) [worker 2] episodes_seen=120 last_return=-405.2 (+1 eps) [worker 3] episodes_seen=120 last_return=-424.1 (+1 eps) [worker 0] episodes_seen=130 last_return=-274.6 (+1 eps) [worker 1] episodes_seen=120 last_return=-281.6 (+1 eps) [worker 2] episodes_seen=130 last_return=-320.7 (+1 eps) [worker 3] episodes_seen=130 last_return=-112.5 (+1 eps) [worker 1] episodes_seen=130 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=140 last_return=-238.9 (+1 eps) [worker 3] episodes_seen=140 last_return=-185.3 (+1 eps) [worker 2] episodes_seen=140 last_return=-142.0 (+1 eps) [A2C][sync] it= 418 steps= 50160 (+120) avg10=-227.33 loss=2204.848 pg=-0.044 vf=4008.908 H=0.672 gn=3348.555 [worker 1] episodes_seen=140 last_return=-170.6 (+1 eps) [worker 0] episodes_seen=150 last_return=-245.8 (+1 eps) [worker 3] episodes_seen=150 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=150 last_return=-215.6 (+1 eps) [worker 1] episodes_seen=150 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=160 last_return=-216.2 (+1 eps) [worker 3] episodes_seen=160 last_return=-392.4 (+1 eps) [worker 2] episodes_seen=160 last_return=-229.1 (+1 eps) [worker 0] episodes_seen=170 last_return=-267.2 (+1 eps) [worker 1] episodes_seen=160 last_return=-202.5 (+1 eps) [worker 2] episodes_seen=170 last_return=-353.5 (+1 eps) [worker 3] episodes_seen=170 last_return=-171.4 (+1 eps) [worker 0] episodes_seen=180 last_return=-207.2 (+1 eps) [worker 1] episodes_seen=170 last_return=-277.1 (+1 eps) [worker 2] episodes_seen=180 last_return=-300.7 (+1 eps) [worker 3] episodes_seen=180 last_return=-287.0 (+1 eps) [worker 1] episodes_seen=180 last_return=-302.4 (+1 eps) [worker 0] episodes_seen=190 last_return=-129.0 (+1 eps) [worker 2] episodes_seen=190 last_return=-355.3 (+1 eps) [worker 3] episodes_seen=190 last_return=-508.9 (+1 eps) [worker 0] episodes_seen=200 last_return=-170.3 (+1 eps) [worker 1] episodes_seen=190 last_return=-194.5 (+1 eps) [worker 2] episodes_seen=200 last_return=-415.3 (+1 eps) [worker 1] episodes_seen=200 last_return=-175.3 (+1 eps) [worker 3] episodes_seen=200 last_return=-178.1 (+1 eps) [worker 0] episodes_seen=210 last_return=-235.6 (+1 eps) [worker 2] episodes_seen=210 last_return=-191.9 (+1 eps) [worker 1] episodes_seen=210 last_return=-148.5 (+1 eps) [worker 3] episodes_seen=210 last_return=-143.3 (+1 eps) [worker 2] episodes_seen=220 last_return=-237.5 (+1 eps) [worker 0] episodes_seen=220 last_return=-294.5 (+1 eps) [worker 1] episodes_seen=220 last_return=-137.1 (+1 eps) [worker 3] episodes_seen=220 last_return=-127.6 (+1 eps) [worker 2] episodes_seen=230 last_return=-156.3 (+1 eps) [worker 0] episodes_seen=230 last_return=-108.6 (+1 eps) [worker 3] episodes_seen=230 last_return=-202.9 (+1 eps) [worker 1] episodes_seen=230 last_return=-138.6 (+1 eps) [worker 0] episodes_seen=240 last_return=-229.0 (+1 eps) [worker 2] episodes_seen=240 last_return=-132.7 (+1 eps) [worker 3] episodes_seen=240 last_return=-323.5 (+1 eps) [worker 1] episodes_seen=240 last_return=-345.6 (+1 eps) [worker 2] episodes_seen=250 last_return=-308.5 (+1 eps) [worker 0] episodes_seen=250 last_return=-257.8 (+1 eps) [worker 3] episodes_seen=250 last_return=-161.7 (+1 eps) [A2C][sync] it= 835 steps= 100200 (+120) avg10=-202.48 loss=38986.523 pg=-0.001 vf=70884.586 H=0.019 gn=46678.652 [worker 1] episodes_seen=250 last_return=-249.9 (+1 eps) [worker 0] episodes_seen=260 last_return=-190.5 (+1 eps) [worker 2] episodes_seen=260 last_return=-181.1 (+1 eps) [worker 3] episodes_seen=260 last_return=-154.4 (+1 eps) [worker 1] episodes_seen=260 last_return=-174.4 (+1 eps) [worker 2] episodes_seen=270 last_return=-329.5 (+1 eps) [worker 0] episodes_seen=270 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=270 last_return=-128.0 (+1 eps) [worker 1] episodes_seen=270 last_return=-229.4 (+1 eps) [worker 0] episodes_seen=280 last_return=-286.4 (+1 eps) [worker 2] episodes_seen=280 last_return=-241.2 (+1 eps) [worker 1] episodes_seen=280 last_return=-309.3 (+1 eps) [worker 3] episodes_seen=280 last_return=-267.2 (+1 eps) [worker 2] episodes_seen=290 last_return=-251.4 (+1 eps) [worker 1] episodes_seen=290 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=290 last_return=-124.5 (+1 eps) [worker 3] episodes_seen=290 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=300 last_return=-297.2 (+1 eps) [worker 0] episodes_seen=300 last_return=-321.5 (+1 eps) [worker 1] episodes_seen=300 last_return=-365.0 (+1 eps) [worker 3] episodes_seen=300 last_return=-349.5 (+1 eps) [worker 2] episodes_seen=310 last_return=-138.3 (+1 eps) [worker 0] episodes_seen=310 last_return=-124.3 (+1 eps) [worker 1] episodes_seen=310 last_return=-125.0 (+1 eps) [worker 3] episodes_seen=310 last_return=-182.0 (+1 eps) [worker 2] episodes_seen=320 last_return=-123.1 (+1 eps) [worker 0] episodes_seen=320 last_return=-378.8 (+1 eps) [worker 1] episodes_seen=320 last_return=-222.8 (+1 eps) [worker 3] episodes_seen=320 last_return=-142.6 (+1 eps) [worker 2] episodes_seen=330 last_return=-245.1 (+1 eps) [worker 0] episodes_seen=330 last_return=-170.7 (+1 eps) [worker 1] episodes_seen=330 last_return=-134.4 (+1 eps) [worker 3] episodes_seen=330 last_return=-241.7 (+1 eps) [worker 1] episodes_seen=340 last_return=-253.9 (+1 eps) [worker 0] episodes_seen=340 last_return=-111.2 (+1 eps) [worker 2] episodes_seen=340 last_return=-235.3 (+1 eps) [worker 3] episodes_seen=340 last_return=-214.5 (+1 eps) [worker 2] episodes_seen=350 last_return=-351.3 (+1 eps) [worker 0] episodes_seen=350 last_return=-181.3 (+1 eps) [worker 1] episodes_seen=350 last_return=-198.6 (+1 eps) [worker 3] episodes_seen=350 last_return=-168.3 (+1 eps) [A2C][sync] it= 1252 steps= 150240 (+120) avg10=-238.22 loss=6783.062 pg=0.001 vf=12332.838 H=0.006 gn=26482.910 [worker 1] episodes_seen=360 last_return=-344.9 (+1 eps) [worker 2] episodes_seen=360 last_return=-126.6 (+1 eps) [worker 0] episodes_seen=360 last_return=-311.7 (+1 eps) [worker 3] episodes_seen=360 last_return=-185.6 (+1 eps) [worker 0] episodes_seen=370 last_return=-250.6 (+1 eps) [worker 1] episodes_seen=370 last_return=-280.2 (+1 eps) [worker 2] episodes_seen=370 last_return=-220.2 (+1 eps) [worker 3] episodes_seen=370 last_return=-216.3 (+1 eps) [worker 1] episodes_seen=380 last_return=-255.3 (+1 eps) [worker 2] episodes_seen=380 last_return=-238.3 (+1 eps) [worker 0] episodes_seen=380 last_return=-180.5 (+1 eps) [worker 3] episodes_seen=380 last_return=-165.8 (+1 eps) [worker 1] episodes_seen=390 last_return=-313.4 (+1 eps) [worker 0] episodes_seen=390 last_return=-259.9 (+1 eps) [worker 3] episodes_seen=390 last_return=-243.6 (+1 eps) [worker 2] episodes_seen=390 last_return=-186.4 (+1 eps) [worker 1] episodes_seen=400 last_return=-279.0 (+1 eps) [worker 0] episodes_seen=400 last_return=-268.9 (+1 eps) [worker 3] episodes_seen=400 last_return=-246.2 (+1 eps) [worker 2] episodes_seen=400 last_return=-200.7 (+1 eps) [worker 0] episodes_seen=410 last_return=-185.7 (+1 eps) [worker 3] episodes_seen=410 last_return=-342.5 (+1 eps) [worker 1] episodes_seen=410 last_return=-228.9 (+1 eps) [worker 2] episodes_seen=410 last_return=-133.9 (+1 eps) [worker 1] episodes_seen=420 last_return=-146.2 (+1 eps) [worker 0] episodes_seen=420 last_return=-149.8 (+1 eps) [worker 3] episodes_seen=420 last_return=-203.1 (+1 eps) [worker 2] episodes_seen=420 last_return=-358.0 (+1 eps) [worker 1] episodes_seen=430 last_return=-191.6 (+1 eps) [worker 0] episodes_seen=430 last_return=-232.2 (+1 eps) [worker 3] episodes_seen=430 last_return=-192.4 (+1 eps) [worker 2] episodes_seen=430 last_return=-192.4 (+1 eps) [worker 0] episodes_seen=440 last_return=-158.9 (+1 eps) [worker 1] episodes_seen=440 last_return=-271.4 (+1 eps) [worker 3] episodes_seen=440 last_return=-234.2 (+1 eps) [worker 2] episodes_seen=440 last_return=-276.0 (+1 eps) [worker 0] episodes_seen=450 last_return=-349.4 (+1 eps) [worker 3] episodes_seen=450 last_return=-221.6 (+1 eps) [worker 2] episodes_seen=450 last_return=-294.7 (+1 eps) [worker 1] episodes_seen=450 last_return=-308.2 (+1 eps) [worker 0] episodes_seen=460 last_return=-312.2 (+1 eps) [A2C][sync] it= 1669 steps= 200280 (+120) avg10=-262.12 loss=4007.138 pg=0.001 vf=7285.703 H=0.009 gn=19656.203 [worker 3] episodes_seen=460 last_return=-285.0 (+1 eps) [worker 2] episodes_seen=460 last_return=-153.4 (+1 eps) [worker 1] episodes_seen=460 last_return=-284.1 (+1 eps) [worker 3] episodes_seen=470 last_return=-116.1 (+1 eps) [worker 0] episodes_seen=470 last_return=-250.1 (+1 eps) [worker 1] episodes_seen=470 last_return=-299.6 (+1 eps) [worker 2] episodes_seen=470 last_return=-244.1 (+1 eps) [worker 3] episodes_seen=480 last_return=-310.5 (+1 eps) [worker 1] episodes_seen=480 last_return=-137.1 (+1 eps) [worker 0] episodes_seen=480 last_return=-163.8 (+1 eps) [worker 2] episodes_seen=480 last_return=-323.1 (+1 eps) [worker 0] episodes_seen=490 last_return=-106.3 (+1 eps) [worker 3] episodes_seen=490 last_return=-246.2 (+1 eps) [worker 1] episodes_seen=490 last_return=-222.5 (+1 eps) [worker 2] episodes_seen=490 last_return=-407.2 (+1 eps) [worker 2] episodes_seen=500 last_return=-307.6 (+1 eps) [worker 0] episodes_seen=500 last_return=-528.7 (+1 eps) [worker 1] episodes_seen=500 last_return=-154.6 (+1 eps) [worker 3] episodes_seen=500 last_return=-290.0 (+1 eps) [worker 0] episodes_seen=510 last_return=-329.4 (+1 eps) [worker 2] episodes_seen=510 last_return=-160.1 (+1 eps) [worker 1] episodes_seen=510 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=510 last_return=-122.2 (+1 eps) [worker 2] episodes_seen=520 last_return=-205.1 (+1 eps) [worker 1] episodes_seen=520 last_return=-121.5 (+1 eps) [worker 0] episodes_seen=520 last_return=-370.3 (+1 eps) [worker 3] episodes_seen=520 last_return=-357.6 (+1 eps) [worker 1] episodes_seen=530 last_return=-262.0 (+1 eps) [worker 2] episodes_seen=530 last_return=-380.2 (+1 eps) [worker 0] episodes_seen=530 last_return=-250.8 (+1 eps) [worker 3] episodes_seen=530 last_return=-177.9 (+1 eps) [worker 1] episodes_seen=540 last_return=-258.1 (+1 eps) [worker 2] episodes_seen=540 last_return=-166.1 (+1 eps) [worker 0] episodes_seen=540 last_return=-181.1 (+1 eps) [worker 3] episodes_seen=540 last_return=-203.6 (+1 eps) [worker 1] episodes_seen=550 last_return=-108.9 (+1 eps) [worker 2] episodes_seen=550 last_return=-211.1 (+1 eps) [worker 3] episodes_seen=550 last_return=-286.6 (+1 eps) [worker 0] episodes_seen=550 last_return=-301.4 (+1 eps) [A2C][sync] it= 2086 steps= 250320 (+120) avg10=-228.38 loss=4933.538 pg=0.000 vf=8970.067 H=0.004 gn=45932.391 [worker 1] episodes_seen=560 last_return=-309.0 (+1 eps) [worker 2] episodes_seen=560 last_return=-184.2 (+1 eps) [worker 3] episodes_seen=560 last_return=-160.7 (+1 eps) [worker 1] episodes_seen=570 last_return=-210.2 (+1 eps) [worker 0] episodes_seen=560 last_return=-164.2 (+1 eps) [worker 2] episodes_seen=570 last_return=-280.6 (+1 eps) [worker 3] episodes_seen=570 last_return=-247.2 (+1 eps) [worker 0] episodes_seen=570 last_return=-278.4 (+1 eps) [worker 1] episodes_seen=580 last_return=-301.6 (+1 eps) [worker 2] episodes_seen=580 last_return=-240.3 (+1 eps) [worker 3] episodes_seen=580 last_return=-227.4 (+1 eps) [worker 0] episodes_seen=580 last_return=-206.6 (+1 eps) [worker 1] episodes_seen=590 last_return=-182.5 (+1 eps) [worker 3] episodes_seen=590 last_return=-227.7 (+1 eps) [worker 2] episodes_seen=590 last_return=-204.5 (+1 eps) [worker 0] episodes_seen=590 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=600 last_return=-195.4 (+1 eps) [worker 3] episodes_seen=600 last_return=-177.1 (+1 eps) [worker 2] episodes_seen=600 last_return=-211.9 (+1 eps) [worker 3] episodes_seen=610 last_return=-290.3 (+1 eps) [worker 2] episodes_seen=610 last_return=-225.8 (+1 eps) [worker 0] episodes_seen=600 last_return=-295.2 (+1 eps) [worker 1] episodes_seen=610 last_return=-166.6 (+1 eps) [worker 3] episodes_seen=620 last_return=-229.3 (+1 eps) [worker 1] episodes_seen=620 last_return=-343.5 (+1 eps) [worker 0] episodes_seen=610 last_return=-219.6 (+1 eps) [worker 2] episodes_seen=620 last_return=-227.5 (+1 eps) [worker 3] episodes_seen=630 last_return=-236.5 (+1 eps) [worker 1] episodes_seen=630 last_return=-125.6 (+1 eps) [worker 0] episodes_seen=620 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=630 last_return=-171.8 (+1 eps) [worker 3] episodes_seen=640 last_return=-175.2 (+1 eps) [worker 1] episodes_seen=640 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=630 last_return=-189.5 (+1 eps) [worker 2] episodes_seen=640 last_return=-268.9 (+1 eps) [worker 3] episodes_seen=650 last_return=-268.1 (+1 eps) [worker 1] episodes_seen=650 last_return=-283.1 (+1 eps) [worker 0] episodes_seen=640 last_return=-243.9 (+1 eps) [worker 2] episodes_seen=650 last_return=-269.6 (+1 eps) [worker 3] episodes_seen=660 last_return=-333.4 (+1 eps) [A2C][sync] it= 2503 steps= 300360 (+120) avg10=-224.61 loss=3864.400 pg=0.001 vf=7026.181 H=0.013 gn=22156.354 [worker 1] episodes_seen=660 last_return=-156.1 (+1 eps) [worker 0] episodes_seen=650 last_return=-353.4 (+1 eps) [worker 2] episodes_seen=660 last_return=-369.6 (+1 eps) [worker 3] episodes_seen=670 last_return=-197.3 (+1 eps) [worker 1] episodes_seen=670 last_return=-232.1 (+1 eps) [worker 2] episodes_seen=670 last_return=-217.7 (+1 eps) [worker 3] episodes_seen=680 last_return=-229.4 (+1 eps) [worker 0] episodes_seen=660 last_return=-250.9 (+1 eps) [worker 1] episodes_seen=680 last_return=-252.4 (+1 eps) [worker 2] episodes_seen=680 last_return=-212.8 (+1 eps) [worker 3] episodes_seen=690 last_return=-199.1 (+1 eps) [worker 0] episodes_seen=670 last_return=-439.5 (+1 eps) [worker 2] episodes_seen=690 last_return=-287.8 (+1 eps) [worker 1] episodes_seen=690 last_return=-274.7 (+1 eps) [worker 3] episodes_seen=700 last_return=-129.2 (+1 eps) [worker 2] episodes_seen=700 last_return=-106.9 (+1 eps) [worker 0] episodes_seen=680 last_return=-231.6 (+1 eps) [worker 1] episodes_seen=700 last_return=-156.2 (+1 eps) [worker 3] episodes_seen=710 last_return=-239.8 (+1 eps) [worker 2] episodes_seen=710 last_return=-243.8 (+1 eps) [worker 0] episodes_seen=690 last_return=-128.9 (+1 eps) [worker 3] episodes_seen=720 last_return=-135.3 (+1 eps) [worker 1] episodes_seen=710 last_return=-307.5 (+1 eps) [worker 2] episodes_seen=720 last_return=-339.0 (+1 eps) [worker 0] episodes_seen=700 last_return=-246.8 (+1 eps) [worker 1] episodes_seen=720 last_return=-189.3 (+1 eps) [worker 2] episodes_seen=730 last_return=-160.1 (+1 eps) [worker 3] episodes_seen=730 last_return=-255.6 (+1 eps) [worker 0] episodes_seen=710 last_return=-202.8 (+1 eps) [worker 1] episodes_seen=730 last_return=-317.9 (+1 eps) [worker 2] episodes_seen=740 last_return=-230.2 (+1 eps) [worker 3] episodes_seen=740 last_return=-116.4 (+1 eps) [worker 0] episodes_seen=720 last_return=-304.1 (+1 eps) [worker 1] episodes_seen=740 last_return=-177.3 (+1 eps) [worker 2] episodes_seen=750 last_return=-292.4 (+1 eps) [worker 3] episodes_seen=750 last_return=-175.4 (+1 eps) [worker 0] episodes_seen=730 last_return=-257.3 (+1 eps) [worker 1] episodes_seen=750 last_return=-107.7 (+1 eps) [worker 3] episodes_seen=760 last_return=-253.0 (+1 eps) [A2C][sync] it= 2920 steps= 350400 (+120) avg10=-241.13 loss=44646.555 pg=-0.002 vf=81175.555 H=0.029 gn=36733.641 [worker 2] episodes_seen=760 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=740 last_return=-295.6 (+1 eps) [worker 1] episodes_seen=760 last_return=-416.6 (+1 eps) [worker 0] episodes_seen=750 last_return=-337.1 (+1 eps) [worker 2] episodes_seen=770 last_return=-290.5 (+1 eps) [worker 3] episodes_seen=770 last_return=-355.5 (+1 eps) [worker 1] episodes_seen=770 last_return=-216.0 (+1 eps) [worker 0] episodes_seen=760 last_return=-283.4 (+1 eps) [worker 2] episodes_seen=780 last_return=-117.4 (+1 eps) [worker 3] episodes_seen=780 last_return=-316.9 (+1 eps) [worker 1] episodes_seen=780 last_return=-246.2 (+1 eps) [worker 0] episodes_seen=770 last_return=-256.2 (+1 eps) [worker 3] episodes_seen=790 last_return=-136.0 (+1 eps) [worker 2] episodes_seen=790 last_return=-284.3 (+1 eps) [worker 1] episodes_seen=790 last_return=-359.9 (+1 eps) [worker 2] episodes_seen=800 last_return=-112.4 (+1 eps) [worker 3] episodes_seen=800 last_return=-152.4 (+1 eps) [worker 0] episodes_seen=780 last_return=-162.6 (+1 eps) [worker 0] episodes_seen=790 last_return=-237.7 (+1 eps) [worker 1] episodes_seen=800 last_return=-282.2 (+1 eps) [worker 3] episodes_seen=810 last_return=-260.0 (+1 eps) [worker 2] episodes_seen=810 last_return=-183.3 (+1 eps) [worker 0] episodes_seen=800 last_return=-91.1 (+1 eps) [worker 1] episodes_seen=810 last_return=-88.8 (+1 eps) [worker 2] episodes_seen=820 last_return=-125.3 (+1 eps) [worker 3] episodes_seen=820 last_return=-61.4 (+1 eps) [worker 0] episodes_seen=810 last_return=-93.7 (+1 eps) [worker 1] episodes_seen=820 last_return=-131.1 (+1 eps) [worker 3] episodes_seen=830 last_return=-66.3 (+1 eps) [worker 2] episodes_seen=830 last_return=-75.1 (+1 eps) [worker 0] episodes_seen=820 last_return=-65.3 (+1 eps) [worker 1] episodes_seen=830 last_return=-93.5 (+1 eps) [worker 3] episodes_seen=840 last_return=-131.9 (+1 eps) [worker 2] episodes_seen=840 last_return=-96.0 (+1 eps) [worker 1] episodes_seen=840 last_return=-63.9 (+1 eps) [worker 0] episodes_seen=830 last_return=-88.1 (+1 eps) [worker 3] episodes_seen=850 last_return=-89.8 (+1 eps) [worker 2] episodes_seen=850 last_return=-100.1 (+1 eps) [worker 0] episodes_seen=840 last_return=-75.4 (+1 eps) [worker 3] episodes_seen=860 last_return=-60.5 (+1 eps) [worker 1] episodes_seen=850 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=860 last_return=-78.8 (+1 eps) [worker 3] episodes_seen=870 last_return=-208.7 (+1 eps) [worker 0] episodes_seen=850 last_return=-118.1 (+1 eps) [worker 1] episodes_seen=860 last_return=-85.9 (+1 eps) [worker 2] episodes_seen=870 last_return=-119.9 (+1 eps) [worker 0] episodes_seen=860 last_return=-93.9 (+1 eps) [worker 3] episodes_seen=880 last_return=-56.0 (+1 eps) [worker 1] episodes_seen=870 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=880 last_return=-232.0 (+1 eps) [worker 3] episodes_seen=890 last_return=-55.8 (+1 eps) [worker 0] episodes_seen=870 last_return=-137.9 (+1 eps) [worker 1] episodes_seen=880 last_return=-75.2 (+1 eps) [worker 2] episodes_seen=890 last_return=-37.4 (+1 eps) [A2C][sync] it= 3337 steps= 400440 (+120) avg10=-224.93 loss=2688.637 pg=0.068 vf=4888.324 H=0.846 gn=8005.894 [worker 3] episodes_seen=900 last_return=-281.2 (+1 eps) [worker 0] episodes_seen=880 last_return=-226.7 (+1 eps) [worker 1] episodes_seen=890 last_return=-320.2 (+1 eps) [worker 2] episodes_seen=900 last_return=-135.1 (+1 eps) [worker 0] episodes_seen=890 last_return=-196.0 (+1 eps) [worker 3] episodes_seen=910 last_return=-120.9 (+1 eps) [worker 1] episodes_seen=900 last_return=-132.3 (+1 eps) [worker 2] episodes_seen=910 last_return=-92.7 (+1 eps) [worker 3] episodes_seen=920 last_return=-117.2 (+1 eps) [worker 0] episodes_seen=900 last_return=-86.2 (+1 eps) [worker 1] episodes_seen=910 last_return=-87.4 (+1 eps) [worker 2] episodes_seen=920 last_return=-103.3 (+1 eps) [worker 3] episodes_seen=930 last_return=-81.3 (+1 eps) [worker 1] episodes_seen=920 last_return=-96.3 (+1 eps) [worker 0] episodes_seen=910 last_return=-136.8 (+1 eps) [worker 2] episodes_seen=930 last_return=-79.0 (+1 eps) [worker 3] episodes_seen=940 last_return=-109.1 (+1 eps) [worker 1] episodes_seen=930 last_return=-86.5 (+1 eps) [worker 0] episodes_seen=920 last_return=-58.0 (+1 eps) [worker 2] episodes_seen=940 last_return=-90.3 (+1 eps) [worker 3] episodes_seen=950 last_return=-210.2 (+1 eps) [worker 0] episodes_seen=930 last_return=-284.2 (+1 eps) [worker 1] episodes_seen=940 last_return=-117.3 (+1 eps) [worker 2] episodes_seen=950 last_return=-165.3 (+1 eps) [worker 1] episodes_seen=950 last_return=-106.5 (+1 eps) [worker 0] episodes_seen=940 last_return=-99.9 (+1 eps) [worker 3] episodes_seen=960 last_return=-232.3 (+1 eps) [worker 2] episodes_seen=960 last_return=-122.8 (+1 eps) [worker 1] episodes_seen=960 last_return=-75.5 (+1 eps) [worker 0] episodes_seen=950 last_return=-67.7 (+1 eps) [worker 3] episodes_seen=970 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=970 last_return=-119.8 (+1 eps) [worker 3] episodes_seen=980 last_return=-99.4 (+1 eps) [worker 1] episodes_seen=970 last_return=-127.3 (+1 eps) [worker 0] episodes_seen=960 last_return=-97.3 (+1 eps) [worker 2] episodes_seen=980 last_return=-139.4 (+1 eps) [worker 3] episodes_seen=990 last_return=-91.3 (+1 eps) [worker 0] episodes_seen=970 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=980 last_return=-85.0 (+1 eps) [worker 2] episodes_seen=990 last_return=-91.4 (+1 eps) [worker 3] episodes_seen=1000 last_return=-118.7 (+1 eps) [worker 0] episodes_seen=980 last_return=-112.0 (+1 eps) [worker 1] episodes_seen=990 last_return=-82.5 (+1 eps) [worker 2] episodes_seen=1000 last_return=-57.8 (+1 eps) [worker 3] episodes_seen=1010 last_return=-83.7 (+1 eps) [worker 0] episodes_seen=990 last_return=-94.9 (+1 eps) [worker 1] episodes_seen=1000 last_return=-95.0 (+1 eps) [worker 2] episodes_seen=1010 last_return=-81.7 (+1 eps) [worker 3] episodes_seen=1020 last_return=-130.4 (+1 eps) [worker 0] episodes_seen=1000 last_return=-158.8 (+1 eps) [worker 1] episodes_seen=1010 last_return=-101.0 (+1 eps) [worker 2] episodes_seen=1020 last_return=-152.3 (+1 eps) [worker 0] episodes_seen=1010 last_return=-147.5 (+1 eps) [A2C][sync] it= 3754 steps= 450480 (+120) avg10=-146.24 loss=235.020 pg=-0.006 vf=427.335 H=0.812 gn=336.276 [worker 2] episodes_seen=1030 last_return=-87.9 (+1 eps) [worker 1] episodes_seen=1020 last_return=-107.8 (+1 eps) [worker 3] episodes_seen=1030 last_return=-98.9 (+1 eps) [worker 0] episodes_seen=1020 last_return=-87.0 (+1 eps) [worker 1] episodes_seen=1030 last_return=-53.9 (+1 eps) [worker 3] episodes_seen=1040 last_return=-88.5 (+1 eps) [worker 2] episodes_seen=1040 last_return=-122.1 (+1 eps) [worker 1] episodes_seen=1040 last_return=-109.7 (+1 eps) [worker 0] episodes_seen=1030 last_return=-100.2 (+1 eps) [worker 3] episodes_seen=1050 last_return=-134.9 (+1 eps) [worker 2] episodes_seen=1050 last_return=-164.1 (+1 eps) [worker 1] episodes_seen=1050 last_return=-84.8 (+1 eps) [worker 0] episodes_seen=1040 last_return=-55.4 (+1 eps) [worker 3] episodes_seen=1060 last_return=-93.1 (+1 eps) [worker 2] episodes_seen=1060 last_return=-79.2 (+1 eps) [worker 1] episodes_seen=1060 last_return=-170.5 (+1 eps) [worker 0] episodes_seen=1050 last_return=-121.7 (+1 eps) [worker 2] episodes_seen=1070 last_return=-56.7 (+1 eps) [worker 3] episodes_seen=1070 last_return=-89.0 (+1 eps) [worker 1] episodes_seen=1070 last_return=-83.5 (+1 eps) [worker 2] episodes_seen=1080 last_return=-66.0 (+1 eps) [worker 0] episodes_seen=1060 last_return=-82.2 (+1 eps) [worker 3] episodes_seen=1080 last_return=-92.6 (+1 eps) [worker 1] episodes_seen=1080 last_return=-252.4 (+1 eps) [worker 0] episodes_seen=1070 last_return=-304.2 (+1 eps) [worker 2] episodes_seen=1090 last_return=-196.6 (+1 eps) [worker 3] episodes_seen=1090 last_return=-307.0 (+1 eps) [worker 1] episodes_seen=1090 last_return=-166.4 (+1 eps) [worker 0] episodes_seen=1080 last_return=-128.5 (+1 eps) [worker 2] episodes_seen=1100 last_return=-131.2 (+1 eps) [worker 3] episodes_seen=1100 last_return=-394.1 (+1 eps) [worker 1] episodes_seen=1100 last_return=-182.3 (+1 eps) [worker 2] episodes_seen=1110 last_return=-261.8 (+1 eps) [worker 0] episodes_seen=1090 last_return=-194.2 (+1 eps) [worker 3] episodes_seen=1110 last_return=-107.9 (+1 eps) [worker 1] episodes_seen=1110 last_return=-236.0 (+1 eps) [worker 2] episodes_seen=1120 last_return=-104.8 (+1 eps) [worker 0] episodes_seen=1100 last_return=-298.6 (+1 eps) [worker 3] episodes_seen=1120 last_return=-289.1 (+1 eps) [worker 1] episodes_seen=1120 last_return=-217.7 (+1 eps) [worker 2] episodes_seen=1130 last_return=-289.6 (+1 eps) [worker 0] episodes_seen=1110 last_return=-290.2 (+1 eps) [worker 3] episodes_seen=1130 last_return=-305.0 (+1 eps) [worker 1] episodes_seen=1130 last_return=-258.0 (+1 eps) [worker 2] episodes_seen=1140 last_return=-219.5 (+1 eps) [worker 0] episodes_seen=1120 last_return=-403.9 (+1 eps) [A2C][sync] it= 4171 steps= 500520 (+120) avg10=-269.09 loss=70714.820 pg=-0.015 vf=128572.430 H=0.065 gn=86541.086 [worker 3] episodes_seen=1140 last_return=-219.9 (+1 eps) [worker 2] episodes_seen=1150 last_return=-237.2 (+1 eps) [worker 1] episodes_seen=1140 last_return=-161.4 (+1 eps) [worker 0] episodes_seen=1130 last_return=-193.3 (+1 eps) [worker 3] episodes_seen=1150 last_return=-310.9 (+1 eps) [worker 2] episodes_seen=1160 last_return=-312.9 (+1 eps) [worker 1] episodes_seen=1150 last_return=-209.7 (+1 eps) [worker 0] episodes_seen=1140 last_return=-179.9 (+1 eps) [worker 3] episodes_seen=1160 last_return=-121.0 (+1 eps) [worker 2] episodes_seen=1170 last_return=-243.5 (+1 eps) [worker 1] episodes_seen=1160 last_return=-131.5 (+1 eps) [worker 0] episodes_seen=1150 last_return=-223.7 (+1 eps) [worker 2] episodes_seen=1180 last_return=-158.9 (+1 eps) [worker 3] episodes_seen=1170 last_return=-243.1 (+1 eps) [worker 1] episodes_seen=1170 last_return=-183.3 (+1 eps) [worker 0] episodes_seen=1160 last_return=-171.6 (+1 eps) [worker 2] episodes_seen=1190 last_return=-316.1 (+1 eps) [worker 3] episodes_seen=1180 last_return=-167.8 (+1 eps) [worker 1] episodes_seen=1180 last_return=-231.5 (+1 eps) [worker 0] episodes_seen=1170 last_return=-222.1 (+1 eps) [worker 2] episodes_seen=1200 last_return=-324.7 (+1 eps) [worker 3] episodes_seen=1190 last_return=-107.9 (+1 eps) [worker 1] episodes_seen=1190 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1180 last_return=-97.9 (+1 eps) [worker 3] episodes_seen=1200 last_return=-89.1 (+1 eps) [worker 2] episodes_seen=1210 last_return=-96.5 (+1 eps) [worker 1] episodes_seen=1200 last_return=-100.5 (+1 eps) [worker 0] episodes_seen=1190 last_return=-83.8 (+1 eps) [worker 3] episodes_seen=1210 last_return=-82.4 (+1 eps) [worker 1] episodes_seen=1210 last_return=-93.3 (+1 eps) [worker 0] episodes_seen=1200 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1220 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=1220 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1220 last_return=-95.4 (+1 eps) [worker 0] episodes_seen=1210 last_return=-96.9 (+1 eps) [worker 2] episodes_seen=1230 last_return=-86.3 (+1 eps) [worker 3] episodes_seen=1230 last_return=-44.7 (+1 eps) [worker 1] episodes_seen=1230 last_return=-85.5 (+1 eps) [worker 0] episodes_seen=1220 last_return=-32.2 (+1 eps) [worker 2] episodes_seen=1240 last_return=-84.6 (+1 eps) [worker 1] episodes_seen=1240 last_return=-63.9 (+1 eps) [worker 3] episodes_seen=1240 last_return=-59.5 (+1 eps) [worker 0] episodes_seen=1230 last_return=-365.6 (+1 eps) [worker 2] episodes_seen=1250 last_return=-71.2 (+1 eps) [worker 1] episodes_seen=1250 last_return=-112.7 (+1 eps) [worker 3] episodes_seen=1250 last_return=-296.3 (+1 eps) [worker 2] episodes_seen=1260 last_return=-65.6 (+1 eps) [worker 0] episodes_seen=1240 last_return=-121.4 (+1 eps) [A2C][sync] it= 4588 steps= 550560 (+120) avg10= -89.04 loss=115.294 pg=0.051 vf=209.551 H=1.038 gn=645.854 [worker 1] episodes_seen=1260 last_return=-211.5 (+1 eps) [worker 3] episodes_seen=1260 last_return=-253.8 (+1 eps) [worker 0] episodes_seen=1250 last_return=-193.2 (+1 eps) [worker 2] episodes_seen=1270 last_return=-120.3 (+1 eps) [worker 1] episodes_seen=1270 last_return=-93.6 (+1 eps) [worker 3] episodes_seen=1270 last_return=-86.5 (+1 eps) [worker 0] episodes_seen=1260 last_return=-91.4 (+1 eps) [worker 2] episodes_seen=1280 last_return=-148.5 (+1 eps) [worker 1] episodes_seen=1280 last_return=-90.5 (+1 eps) [worker 3] episodes_seen=1280 last_return=-114.3 (+1 eps) [worker 0] episodes_seen=1270 last_return=-137.2 (+1 eps) [worker 2] episodes_seen=1290 last_return=-92.9 (+1 eps) [worker 1] episodes_seen=1290 last_return=-171.4 (+1 eps) [worker 0] episodes_seen=1280 last_return=-133.1 (+1 eps) [worker 3] episodes_seen=1290 last_return=-84.4 (+1 eps) [worker 2] episodes_seen=1300 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1300 last_return=-90.3 (+1 eps) [worker 0] episodes_seen=1290 last_return=-137.1 (+1 eps) [worker 3] episodes_seen=1300 last_return=-85.8 (+1 eps) [worker 2] episodes_seen=1310 last_return=-306.4 (+1 eps) [worker 1] episodes_seen=1310 last_return=-132.3 (+1 eps) [worker 2] episodes_seen=1320 last_return=-188.8 (+1 eps) [worker 0] episodes_seen=1300 last_return=2.6 (+1 eps) [worker 3] episodes_seen=1310 last_return=-156.2 (+1 eps) [worker 1] episodes_seen=1320 last_return=-33.2 (+1 eps) [worker 2] episodes_seen=1330 last_return=-105.9 (+1 eps) [worker 0] episodes_seen=1310 last_return=-105.0 (+1 eps) [worker 3] episodes_seen=1320 last_return=-87.0 (+1 eps) [worker 1] episodes_seen=1330 last_return=-88.1 (+1 eps) [worker 2] episodes_seen=1340 last_return=-148.3 (+1 eps) [worker 3] episodes_seen=1330 last_return=-121.2 (+1 eps) [worker 0] episodes_seen=1320 last_return=-92.4 (+1 eps) [worker 1] episodes_seen=1340 last_return=82.1 (+1 eps) [worker 2] episodes_seen=1350 last_return=-105.8 (+1 eps) [worker 3] episodes_seen=1340 last_return=-168.2 (+1 eps) [worker 0] episodes_seen=1330 last_return=-215.2 (+1 eps) [worker 1] episodes_seen=1350 last_return=-117.5 (+1 eps) [worker 2] episodes_seen=1360 last_return=-93.6 (+1 eps) [worker 3] episodes_seen=1350 last_return=-174.6 (+1 eps) [worker 0] episodes_seen=1340 last_return=-65.2 (+1 eps) [worker 1] episodes_seen=1360 last_return=-92.1 (+1 eps) [worker 2] episodes_seen=1370 last_return=-81.2 (+1 eps) [worker 3] episodes_seen=1360 last_return=-110.2 (+1 eps) [worker 0] episodes_seen=1350 last_return=-138.1 (+1 eps) [worker 1] episodes_seen=1370 last_return=-69.8 (+1 eps) [worker 2] episodes_seen=1380 last_return=-86.0 (+1 eps) [worker 3] episodes_seen=1370 last_return=-98.8 (+1 eps) [worker 0] episodes_seen=1360 last_return=-120.3 (+1 eps) [worker 1] episodes_seen=1380 last_return=-115.7 (+1 eps) [worker 2] episodes_seen=1390 last_return=-173.4 (+1 eps) [worker 3] episodes_seen=1380 last_return=-104.0 (+1 eps) [worker 1] episodes_seen=1390 last_return=-126.0 (+1 eps) [A2C][sync] it= 5005 steps= 600600 (+120) avg10=-113.10 loss=59.692 pg=-0.007 vf=108.560 H=0.970 gn=454.206 [worker 2] episodes_seen=1400 last_return=-155.4 (+1 eps) [worker 0] episodes_seen=1370 last_return=-123.6 (+1 eps) [worker 3] episodes_seen=1390 last_return=-155.7 (+1 eps) [worker 1] episodes_seen=1400 last_return=-78.6 (+1 eps) [worker 2] episodes_seen=1410 last_return=-121.2 (+1 eps) [worker 3] episodes_seen=1400 last_return=-70.1 (+1 eps) [worker 0] episodes_seen=1380 last_return=-84.7 (+1 eps) [worker 1] episodes_seen=1410 last_return=-114.6 (+1 eps) [worker 2] episodes_seen=1420 last_return=-118.1 (+1 eps) [worker 3] episodes_seen=1410 last_return=-90.4 (+1 eps) [worker 0] episodes_seen=1390 last_return=-224.7 (+1 eps) [worker 1] episodes_seen=1420 last_return=-213.1 (+1 eps) [worker 2] episodes_seen=1430 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=1420 last_return=-262.0 (+1 eps) [worker 0] episodes_seen=1400 last_return=-129.9 (+1 eps) [worker 1] episodes_seen=1430 last_return=-195.3 (+1 eps) [worker 2] episodes_seen=1440 last_return=-345.5 (+1 eps) [worker 3] episodes_seen=1430 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1440 last_return=-256.3 (+1 eps) [worker 0] episodes_seen=1410 last_return=-214.1 (+1 eps) [worker 2] episodes_seen=1450 last_return=-128.2 (+1 eps) [worker 3] episodes_seen=1440 last_return=-59.6 (+1 eps) [worker 0] episodes_seen=1420 last_return=-73.9 (+1 eps) [worker 1] episodes_seen=1450 last_return=-120.5 (+1 eps) [worker 2] episodes_seen=1460 last_return=-78.1 (+1 eps) [worker 3] episodes_seen=1450 last_return=-115.9 (+1 eps) [worker 0] episodes_seen=1430 last_return=-118.8 (+1 eps) [worker 1] episodes_seen=1460 last_return=-107.8 (+1 eps) [worker 2] episodes_seen=1470 last_return=-108.2 (+1 eps) [worker 3] episodes_seen=1460 last_return=-114.3 (+1 eps) [worker 1] episodes_seen=1470 last_return=-135.1 (+1 eps) [worker 0] episodes_seen=1440 last_return=-93.1 (+1 eps) [worker 2] episodes_seen=1480 last_return=-84.9 (+1 eps) [worker 3] episodes_seen=1470 last_return=-69.0 (+1 eps) [worker 0] episodes_seen=1450 last_return=-79.2 (+1 eps) [worker 2] episodes_seen=1490 last_return=-114.6 (+1 eps) [worker 3] episodes_seen=1480 last_return=-79.9 (+1 eps) [worker 0] episodes_seen=1460 last_return=-223.4 (+1 eps) [worker 1] episodes_seen=1480 last_return=-99.9 (+1 eps) [worker 1] episodes_seen=1490 last_return=-86.1 (+1 eps) [worker 3] episodes_seen=1490 last_return=-74.9 (+1 eps) [worker 0] episodes_seen=1470 last_return=-100.4 (+1 eps) [worker 2] episodes_seen=1500 last_return=-118.8 (+1 eps) [A2C][sync] it= 5422 steps= 650640 (+120) avg10=-115.52 loss=74.862 pg=0.051 vf=136.038 H=0.993 gn=190.492 [worker 1] episodes_seen=1500 last_return=-77.8 (+1 eps) [worker 0] episodes_seen=1480 last_return=-105.9 (+1 eps) [worker 3] episodes_seen=1500 last_return=-69.3 (+1 eps) [worker 2] episodes_seen=1510 last_return=-80.3 (+1 eps) [worker 1] episodes_seen=1510 last_return=-131.8 (+1 eps) [worker 3] episodes_seen=1510 last_return=-100.3 (+1 eps) [worker 0] episodes_seen=1490 last_return=-101.8 (+1 eps) [worker 2] episodes_seen=1520 last_return=-185.1 (+1 eps) [worker 1] episodes_seen=1520 last_return=-139.4 (+1 eps) [worker 3] episodes_seen=1520 last_return=-75.6 (+1 eps) [worker 0] episodes_seen=1500 last_return=-370.5 (+1 eps) [worker 2] episodes_seen=1530 last_return=-198.7 (+1 eps) [worker 1] episodes_seen=1530 last_return=-129.0 (+1 eps) [worker 3] episodes_seen=1530 last_return=-224.0 (+1 eps) [worker 0] episodes_seen=1510 last_return=-200.1 (+1 eps) [worker 2] episodes_seen=1540 last_return=-240.3 (+1 eps) [worker 1] episodes_seen=1540 last_return=-155.2 (+1 eps) [worker 3] episodes_seen=1540 last_return=-315.5 (+1 eps) [worker 0] episodes_seen=1520 last_return=-150.4 (+1 eps) [worker 2] episodes_seen=1550 last_return=-175.2 (+1 eps) [worker 1] episodes_seen=1550 last_return=-171.0 (+1 eps) [worker 3] episodes_seen=1550 last_return=-188.0 (+1 eps) [worker 0] episodes_seen=1530 last_return=-175.7 (+1 eps) [worker 2] episodes_seen=1560 last_return=-151.1 (+1 eps) [worker 1] episodes_seen=1560 last_return=-231.1 (+1 eps) [worker 3] episodes_seen=1560 last_return=-190.3 (+1 eps) [worker 0] episodes_seen=1540 last_return=-162.3 (+1 eps) [worker 2] episodes_seen=1570 last_return=-133.2 (+1 eps) [worker 1] episodes_seen=1570 last_return=-334.6 (+1 eps) [worker 3] episodes_seen=1570 last_return=-169.4 (+1 eps) [worker 0] episodes_seen=1550 last_return=-146.3 (+1 eps) [worker 2] episodes_seen=1580 last_return=-177.5 (+1 eps) [worker 1] episodes_seen=1580 last_return=-203.8 (+1 eps) [worker 3] episodes_seen=1580 last_return=-217.3 (+1 eps) [worker 0] episodes_seen=1560 last_return=-173.5 (+1 eps) [worker 2] episodes_seen=1590 last_return=-94.8 (+1 eps) [worker 1] episodes_seen=1590 last_return=-72.5 (+1 eps) [worker 3] episodes_seen=1590 last_return=-68.8 (+1 eps) [worker 0] episodes_seen=1570 last_return=-87.2 (+1 eps) [worker 2] episodes_seen=1600 last_return=-112.3 (+1 eps) [worker 1] episodes_seen=1600 last_return=-109.9 (+1 eps) [worker 3] episodes_seen=1600 last_return=-101.4 (+1 eps) [worker 0] episodes_seen=1580 last_return=-33.9 (+1 eps) [worker 2] episodes_seen=1610 last_return=69.1 (+1 eps) [worker 1] episodes_seen=1610 last_return=23.0 (+1 eps) [worker 3] episodes_seen=1610 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1590 last_return=-89.5 (+1 eps) [worker 2] episodes_seen=1620 last_return=-288.9 (+1 eps) [A2C][sync] it= 5839 steps= 700680 (+120) avg10=-189.22 loss=1393.406 pg=-0.005 vf=2533.490 H=0.797 gn=2142.638 [worker 1] episodes_seen=1620 last_return=-124.7 (+1 eps) [worker 3] episodes_seen=1620 last_return=-83.1 (+1 eps) [worker 0] episodes_seen=1600 last_return=-128.9 (+1 eps) [worker 2] episodes_seen=1630 last_return=-118.6 (+1 eps) [worker 1] episodes_seen=1630 last_return=-137.4 (+1 eps) [worker 3] episodes_seen=1630 last_return=-324.2 (+1 eps) [worker 0] episodes_seen=1610 last_return=-353.5 (+1 eps) [worker 2] episodes_seen=1640 last_return=-173.8 (+1 eps) [worker 1] episodes_seen=1640 last_return=-133.7 (+1 eps) [worker 3] episodes_seen=1640 last_return=-322.6 (+1 eps) [worker 0] episodes_seen=1620 last_return=-345.5 (+1 eps) [worker 2] episodes_seen=1650 last_return=-212.9 (+1 eps) [worker 1] episodes_seen=1650 last_return=-112.4 (+1 eps) [worker 3] episodes_seen=1650 last_return=-302.7 (+1 eps) [worker 0] episodes_seen=1630 last_return=-78.8 (+1 eps) [worker 2] episodes_seen=1660 last_return=-315.1 (+1 eps) [worker 3] episodes_seen=1660 last_return=-171.6 (+1 eps) [worker 1] episodes_seen=1660 last_return=-222.1 (+1 eps) [worker 0] episodes_seen=1640 last_return=-152.7 (+1 eps) [worker 2] episodes_seen=1670 last_return=-151.1 (+1 eps) [worker 1] episodes_seen=1670 last_return=-254.3 (+1 eps) [worker 3] episodes_seen=1670 last_return=-133.0 (+1 eps) [worker 0] episodes_seen=1650 last_return=-135.7 (+1 eps) [worker 2] episodes_seen=1680 last_return=-90.6 (+1 eps) [worker 1] episodes_seen=1680 last_return=-110.0 (+1 eps) [worker 3] episodes_seen=1680 last_return=-247.2 (+1 eps) [worker 0] episodes_seen=1660 last_return=-104.2 (+1 eps) [worker 2] episodes_seen=1690 last_return=-107.7 (+1 eps) [worker 0] episodes_seen=1670 last_return=-122.6 (+1 eps) [worker 1] episodes_seen=1690 last_return=-84.2 (+1 eps) [worker 3] episodes_seen=1690 last_return=-135.6 (+1 eps) [worker 2] episodes_seen=1700 last_return=-149.5 (+1 eps) [worker 0] episodes_seen=1680 last_return=-184.6 (+1 eps) [worker 1] episodes_seen=1700 last_return=-147.7 (+1 eps) [worker 3] episodes_seen=1700 last_return=-173.6 (+1 eps) [worker 2] episodes_seen=1710 last_return=-175.9 (+1 eps) [worker 1] episodes_seen=1710 last_return=-78.5 (+1 eps) [worker 3] episodes_seen=1710 last_return=-77.7 (+1 eps) [worker 0] episodes_seen=1690 last_return=-111.6 (+1 eps) [worker 2] episodes_seen=1720 last_return=-93.1 (+1 eps) [worker 1] episodes_seen=1720 last_return=-100.5 (+1 eps) [worker 3] episodes_seen=1720 last_return=-73.2 (+1 eps) [worker 0] episodes_seen=1700 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1730 last_return=-86.5 (+1 eps) [worker 1] episodes_seen=1730 last_return=-60.2 (+1 eps) [worker 3] episodes_seen=1730 last_return=-236.6 (+1 eps) [worker 0] episodes_seen=1710 last_return=-79.1 (+1 eps) [worker 2] episodes_seen=1740 last_return=-100.8 (+1 eps) [worker 3] episodes_seen=1740 last_return=-47.2 (+1 eps) [worker 1] episodes_seen=1740 last_return=-69.4 (+1 eps) [worker 0] episodes_seen=1720 last_return=-81.1 (+1 eps) [worker 2] episodes_seen=1750 last_return=-17.6 (+1 eps) [worker 3] episodes_seen=1750 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1750 last_return=-114.1 (+1 eps) [worker 0] episodes_seen=1730 last_return=-91.0 (+1 eps) [worker 2] episodes_seen=1760 last_return=-168.7 (+1 eps) [worker 3] episodes_seen=1760 last_return=-98.4 (+1 eps) [worker 0] episodes_seen=1740 last_return=-102.3 (+1 eps) [worker 1] episodes_seen=1760 last_return=-99.3 (+1 eps) [A2C][sync] it= 6256 steps= 750720 (+120) avg10=-170.28 loss=4812.966 pg=-0.020 vf=8750.885 H=0.113 gn=18739.896 [worker 2] episodes_seen=1770 last_return=-234.8 (+1 eps) [worker 3] episodes_seen=1770 last_return=-415.2 (+1 eps) [worker 0] episodes_seen=1750 last_return=-156.5 (+1 eps) [worker 1] episodes_seen=1770 last_return=-317.6 (+1 eps) [worker 2] episodes_seen=1780 last_return=-119.6 (+1 eps) [worker 0] episodes_seen=1760 last_return=-111.0 (+1 eps) [worker 3] episodes_seen=1780 last_return=-320.9 (+1 eps) [worker 1] episodes_seen=1780 last_return=-86.2 (+1 eps) [worker 2] episodes_seen=1790 last_return=-108.4 (+1 eps) [worker 0] episodes_seen=1770 last_return=-227.4 (+1 eps) [worker 3] episodes_seen=1790 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1790 last_return=-251.2 (+1 eps) [worker 2] episodes_seen=1800 last_return=-231.1 (+1 eps) [worker 0] episodes_seen=1780 last_return=41.9 (+1 eps) [worker 3] episodes_seen=1800 last_return=-78.0 (+1 eps) [worker 1] episodes_seen=1800 last_return=-124.5 (+1 eps) [worker 2] episodes_seen=1810 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1790 last_return=-79.7 (+1 eps) [worker 3] episodes_seen=1810 last_return=-91.7 (+1 eps) [worker 1] episodes_seen=1810 last_return=-96.8 (+1 eps) [worker 2] episodes_seen=1820 last_return=-105.6 (+1 eps) [worker 0] episodes_seen=1800 last_return=-101.4 (+1 eps) [worker 3] episodes_seen=1820 last_return=-168.4 (+1 eps) [worker 1] episodes_seen=1820 last_return=-95.8 (+1 eps) [worker 2] episodes_seen=1830 last_return=-105.5 (+1 eps) [worker 0] episodes_seen=1810 last_return=-85.1 (+1 eps) [worker 3] episodes_seen=1830 last_return=-29.3 (+1 eps) [worker 2] episodes_seen=1840 last_return=-93.0 (+1 eps) [worker 1] episodes_seen=1830 last_return=-98.2 (+1 eps) [worker 3] episodes_seen=1840 last_return=-112.8 (+1 eps) [worker 0] episodes_seen=1820 last_return=-89.0 (+1 eps) [worker 2] episodes_seen=1850 last_return=-98.5 (+1 eps) [worker 1] episodes_seen=1840 last_return=-117.9 (+1 eps) [worker 0] episodes_seen=1830 last_return=-87.8 (+1 eps) [worker 3] episodes_seen=1850 last_return=-56.5 (+1 eps) [worker 2] episodes_seen=1860 last_return=-283.0 (+1 eps) [worker 0] episodes_seen=1840 last_return=-91.4 (+1 eps) [worker 3] episodes_seen=1860 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1870 last_return=-61.8 (+1 eps) [worker 1] episodes_seen=1850 last_return=-183.0 (+1 eps) [worker 3] episodes_seen=1870 last_return=-68.5 (+1 eps) [A2C][sync] it= 6673 steps= 800760 (+120) avg10= -67.67 loss=826.876 pg=-0.015 vf=1503.457 H=1.046 gn=2371.164 [worker 2] episodes_seen=1880 last_return=-99.2 (+1 eps) [worker 1] episodes_seen=1860 last_return=-29.8 (+1 eps) [worker 0] episodes_seen=1850 last_return=-48.0 (+1 eps) [worker 3] episodes_seen=1880 last_return=-25.0 (+1 eps) [worker 2] episodes_seen=1890 last_return=-87.8 (+1 eps) [worker 1] episodes_seen=1870 last_return=-138.0 (+1 eps) [worker 0] episodes_seen=1860 last_return=-193.6 (+1 eps) [worker 3] episodes_seen=1890 last_return=-117.9 (+1 eps) [worker 2] episodes_seen=1900 last_return=-285.6 (+1 eps) [worker 1] episodes_seen=1880 last_return=73.6 (+1 eps) [worker 0] episodes_seen=1870 last_return=-75.5 (+1 eps) [worker 3] episodes_seen=1900 last_return=-96.0 (+1 eps) [worker 2] episodes_seen=1910 last_return=-61.8 (+1 eps) [worker 1] episodes_seen=1890 last_return=-71.2 (+1 eps) [worker 0] episodes_seen=1880 last_return=-132.5 (+1 eps) [worker 3] episodes_seen=1910 last_return=-89.3 (+1 eps) [worker 1] episodes_seen=1900 last_return=-52.7 (+1 eps) [worker 2] episodes_seen=1920 last_return=-92.2 (+1 eps) [worker 0] episodes_seen=1890 last_return=-103.9 (+1 eps) [worker 3] episodes_seen=1920 last_return=-246.1 (+1 eps) [worker 1] episodes_seen=1910 last_return=-154.3 (+1 eps) [worker 2] episodes_seen=1930 last_return=-68.5 (+1 eps) [worker 0] episodes_seen=1900 last_return=-50.8 (+1 eps) [worker 3] episodes_seen=1930 last_return=-22.6 (+1 eps) [worker 1] episodes_seen=1920 last_return=-226.3 (+1 eps) [worker 2] episodes_seen=1940 last_return=-121.9 (+1 eps) [worker 0] episodes_seen=1910 last_return=-74.3 (+1 eps) [worker 3] episodes_seen=1940 last_return=-79.1 (+1 eps) [worker 1] episodes_seen=1930 last_return=-76.1 (+1 eps) [worker 2] episodes_seen=1950 last_return=-117.2 (+1 eps) [worker 0] episodes_seen=1920 last_return=-97.0 (+1 eps) [worker 3] episodes_seen=1950 last_return=-80.1 (+1 eps) [worker 1] episodes_seen=1940 last_return=-70.1 (+1 eps) [worker 2] episodes_seen=1960 last_return=-61.4 (+1 eps) [worker 0] episodes_seen=1930 last_return=-103.1 (+1 eps) [worker 3] episodes_seen=1960 last_return=-44.3 (+1 eps) [worker 1] episodes_seen=1950 last_return=-135.6 (+1 eps) [worker 2] episodes_seen=1970 last_return=-35.9 (+1 eps) [worker 0] episodes_seen=1940 last_return=-57.8 (+1 eps) [worker 3] episodes_seen=1970 last_return=-78.6 (+1 eps) [worker 1] episodes_seen=1960 last_return=-230.5 (+1 eps) [worker 2] episodes_seen=1980 last_return=-11.9 (+1 eps) [worker 0] episodes_seen=1950 last_return=-79.0 (+1 eps) [worker 3] episodes_seen=1980 last_return=-166.4 (+1 eps) [worker 2] episodes_seen=1990 last_return=-167.3 (+1 eps) [worker 1] episodes_seen=1970 last_return=-40.7 (+1 eps) [worker 0] episodes_seen=1960 last_return=-174.2 (+1 eps) [worker 3] episodes_seen=1990 last_return=-98.4 (+1 eps) [worker 2] episodes_seen=2000 last_return=-174.5 (+1 eps) [A2C][sync] it= 7090 steps= 850800 (+120) avg10= -76.87 loss=109.892 pg=-0.023 vf=199.864 H=1.064 gn=258.693 [worker 1] episodes_seen=1980 last_return=20.6 (+1 eps) [worker 0] episodes_seen=1970 last_return=-39.2 (+1 eps) [worker 3] episodes_seen=2000 last_return=-127.3 (+1 eps) [worker 2] episodes_seen=2010 last_return=-69.8 (+1 eps) [worker 1] episodes_seen=1990 last_return=-160.9 (+1 eps) [worker 0] episodes_seen=1980 last_return=-99.9 (+1 eps) [worker 3] episodes_seen=2010 last_return=-123.0 (+1 eps) [worker 2] episodes_seen=2020 last_return=-46.2 (+1 eps) [worker 1] episodes_seen=2000 last_return=-29.0 (+1 eps) [worker 0] episodes_seen=1990 last_return=-72.8 (+1 eps) [worker 3] episodes_seen=2020 last_return=-86.9 (+1 eps) [worker 2] episodes_seen=2030 last_return=-215.1 (+1 eps) [worker 1] episodes_seen=2010 last_return=-129.9 (+1 eps) [worker 0] episodes_seen=2000 last_return=-413.7 (+1 eps) [worker 3] episodes_seen=2030 last_return=-232.0 (+1 eps) [worker 2] episodes_seen=2040 last_return=-239.0 (+1 eps) [worker 1] episodes_seen=2020 last_return=-291.7 (+1 eps) [worker 0] episodes_seen=2010 last_return=-316.5 (+1 eps) [worker 3] episodes_seen=2040 last_return=-109.8 (+1 eps) [worker 2] episodes_seen=2050 last_return=-107.2 (+1 eps) [worker 1] episodes_seen=2030 last_return=-154.8 (+1 eps) [worker 0] episodes_seen=2020 last_return=-204.5 (+1 eps) [worker 3] episodes_seen=2050 last_return=-123.0 (+1 eps) [worker 2] episodes_seen=2060 last_return=-398.7 (+1 eps) [worker 1] episodes_seen=2040 last_return=-120.3 (+1 eps) [worker 0] episodes_seen=2030 last_return=-142.2 (+1 eps) [worker 3] episodes_seen=2060 last_return=-389.7 (+1 eps) [worker 2] episodes_seen=2070 last_return=-210.9 (+1 eps) [worker 1] episodes_seen=2050 last_return=-199.5 (+1 eps) [worker 0] episodes_seen=2040 last_return=-302.6 (+1 eps) [worker 3] episodes_seen=2070 last_return=-343.8 (+1 eps) [worker 2] episodes_seen=2080 last_return=-434.6 (+1 eps) [worker 1] episodes_seen=2060 last_return=-318.4 (+1 eps) [worker 0] episodes_seen=2050 last_return=-257.1 (+1 eps) [worker 3] episodes_seen=2080 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=2090 last_return=-275.1 (+1 eps) [worker 1] episodes_seen=2070 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=2060 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=2090 last_return=-431.0 (+1 eps) [worker 2] episodes_seen=2100 last_return=-112.8 (+1 eps) [worker 1] episodes_seen=2080 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=2070 last_return=-108.1 (+1 eps) [worker 3] episodes_seen=2100 last_return=-177.0 (+1 eps) [worker 1] episodes_seen=2090 last_return=-192.8 (+1 eps) [worker 2] episodes_seen=2110 last_return=-174.8 (+1 eps) [worker 0] episodes_seen=2080 last_return=-217.3 (+1 eps) [worker 3] episodes_seen=2110 last_return=-144.3 (+1 eps) [worker 1] episodes_seen=2100 last_return=-268.6 (+1 eps) [worker 2] episodes_seen=2120 last_return=-195.1 (+1 eps) [worker 0] episodes_seen=2090 last_return=-265.0 (+1 eps) [A2C][sync] it= 7507 steps= 900840 (+120) avg10=-271.73 loss=7178.208 pg=-0.068 vf=13051.419 H=0.384 gn=6228.965 [worker 3] episodes_seen=2120 last_return=-289.0 (+1 eps) [worker 1] episodes_seen=2110 last_return=-351.1 (+1 eps) [worker 2] episodes_seen=2130 last_return=-274.6 (+1 eps) [worker 0] episodes_seen=2100 last_return=-332.0 (+1 eps) [worker 3] episodes_seen=2130 last_return=-178.5 (+1 eps) [worker 1] episodes_seen=2120 last_return=-344.2 (+1 eps) [worker 2] episodes_seen=2140 last_return=-264.0 (+1 eps) [worker 0] episodes_seen=2110 last_return=-365.8 (+1 eps) [worker 3] episodes_seen=2140 last_return=-296.1 (+1 eps) [worker 1] episodes_seen=2130 last_return=-139.5 (+1 eps) [worker 2] episodes_seen=2150 last_return=-281.0 (+1 eps) [worker 0] episodes_seen=2120 last_return=-83.6 (+1 eps) [worker 3] episodes_seen=2150 last_return=-66.7 (+1 eps) [worker 1] episodes_seen=2140 last_return=-64.5 (+1 eps) [worker 2] episodes_seen=2160 last_return=-157.0 (+1 eps) [worker 0] episodes_seen=2130 last_return=31.3 (+1 eps) [worker 3] episodes_seen=2160 last_return=-74.3 (+1 eps) [worker 1] episodes_seen=2150 last_return=-111.8 (+1 eps) [worker 2] episodes_seen=2170 last_return=-93.0 (+1 eps) [worker 0] episodes_seen=2140 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=2170 last_return=-89.1 (+1 eps) [worker 1] episodes_seen=2160 last_return=-136.8 (+1 eps) [worker 2] episodes_seen=2180 last_return=-83.0 (+1 eps) [worker 0] episodes_seen=2150 last_return=-98.9 (+1 eps) [worker 3] episodes_seen=2180 last_return=-40.3 (+1 eps) [worker 1] episodes_seen=2170 last_return=-40.5 (+1 eps) [worker 2] episodes_seen=2190 last_return=-101.3 (+1 eps) [worker 0] episodes_seen=2160 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=2190 last_return=-76.5 (+1 eps) [worker 1] episodes_seen=2180 last_return=-45.4 (+1 eps) [worker 2] episodes_seen=2200 last_return=-78.5 (+1 eps) [worker 0] episodes_seen=2170 last_return=-44.1 (+1 eps) [worker 3] episodes_seen=2200 last_return=-83.6 (+1 eps) [worker 1] episodes_seen=2190 last_return=-84.8 (+1 eps) [worker 2] episodes_seen=2210 last_return=-134.2 (+1 eps) [worker 0] episodes_seen=2180 last_return=-114.9 (+1 eps) [worker 3] episodes_seen=2210 last_return=-42.9 (+1 eps) [worker 1] episodes_seen=2200 last_return=-57.6 (+1 eps) [worker 2] episodes_seen=2220 last_return=-75.9 (+1 eps) [worker 0] episodes_seen=2190 last_return=-89.9 (+1 eps) [worker 3] episodes_seen=2220 last_return=-93.0 (+1 eps) [worker 1] episodes_seen=2210 last_return=-113.0 (+1 eps) [worker 2] episodes_seen=2230 last_return=-72.4 (+1 eps) [worker 0] episodes_seen=2200 last_return=-72.6 (+1 eps) [worker 3] episodes_seen=2230 last_return=-51.9 (+1 eps) [worker 1] episodes_seen=2220 last_return=-110.4 (+1 eps) [worker 2] episodes_seen=2240 last_return=-77.6 (+1 eps) [worker 0] episodes_seen=2210 last_return=-109.1 (+1 eps) [worker 3] episodes_seen=2240 last_return=-6.6 (+1 eps) [worker 1] episodes_seen=2230 last_return=-33.4 (+1 eps) [worker 2] episodes_seen=2250 last_return=-79.6 (+1 eps) [worker 0] episodes_seen=2220 last_return=-93.6 (+1 eps) [worker 3] episodes_seen=2250 last_return=-180.1 (+1 eps) [worker 1] episodes_seen=2240 last_return=-42.3 (+1 eps) [worker 2] episodes_seen=2260 last_return=-67.3 (+1 eps) [worker 0] episodes_seen=2230 last_return=-70.8 (+1 eps) [worker 3] episodes_seen=2260 last_return=-82.7 (+1 eps) [worker 1] episodes_seen=2250 last_return=-44.6 (+1 eps) [worker 2] episodes_seen=2270 last_return=-65.9 (+1 eps) [worker 0] episodes_seen=2240 last_return=-70.8 (+1 eps) [A2C][sync] it= 7924 steps= 950880 (+120) avg10= -81.93 loss=366.435 pg=-0.089 vf=666.423 H=0.956 gn=1703.701 [worker 3] episodes_seen=2270 last_return=-39.8 (+1 eps) [worker 1] episodes_seen=2260 last_return=-54.6 (+1 eps) [worker 0] episodes_seen=2250 last_return=-77.9 (+1 eps) [worker 2] episodes_seen=2280 last_return=-54.7 (+1 eps) [worker 3] episodes_seen=2280 last_return=-42.6 (+1 eps) [worker 1] episodes_seen=2270 last_return=-62.9 (+1 eps) [worker 2] episodes_seen=2290 last_return=-56.5 (+1 eps) [worker 0] episodes_seen=2260 last_return=-78.3 (+1 eps) [worker 3] episodes_seen=2290 last_return=-68.3 (+1 eps) [worker 2] episodes_seen=2300 last_return=-24.7 (+1 eps) [worker 1] episodes_seen=2280 last_return=-90.0 (+1 eps) [worker 0] episodes_seen=2270 last_return=-99.5 (+1 eps) [worker 3] episodes_seen=2300 last_return=-78.2 (+1 eps) [worker 1] episodes_seen=2290 last_return=-3.3 (+1 eps) [worker 2] episodes_seen=2310 last_return=-82.2 (+1 eps) [worker 0] episodes_seen=2280 last_return=-85.8 (+1 eps) [worker 1] episodes_seen=2300 last_return=-11.3 (+1 eps) [worker 2] episodes_seen=2320 last_return=-66.3 (+1 eps) [worker 0] episodes_seen=2290 last_return=-193.4 (+1 eps) [worker 3] episodes_seen=2310 last_return=20.4 (+1 eps) [worker 1] episodes_seen=2310 last_return=-79.2 (+1 eps) [worker 2] episodes_seen=2330 last_return=-99.8 (+1 eps) [worker 0] episodes_seen=2300 last_return=-271.5 (+1 eps) [worker 3] episodes_seen=2320 last_return=-25.8 (+1 eps) [worker 2] episodes_seen=2340 last_return=-53.1 (+1 eps) [worker 1] episodes_seen=2320 last_return=-84.5 (+1 eps) [worker 0] episodes_seen=2310 last_return=-89.8 (+1 eps) [worker 3] episodes_seen=2330 last_return=-76.2 (+1 eps) [worker 2] episodes_seen=2350 last_return=-74.8 (+1 eps) [worker 0] episodes_seen=2320 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2330 last_return=-65.6 (+1 eps) [worker 3] episodes_seen=2340 last_return=-53.0 (+1 eps) [worker 2] episodes_seen=2360 last_return=-152.6 (+1 eps) [worker 1] episodes_seen=2340 last_return=-70.7 (+1 eps) [worker 0] episodes_seen=2330 last_return=6.1 (+1 eps) [worker 3] episodes_seen=2350 last_return=-59.5 (+1 eps) [worker 2] episodes_seen=2370 last_return=-49.3 (+1 eps) [worker 1] episodes_seen=2350 last_return=-80.4 (+1 eps) [worker 3] episodes_seen=2360 last_return=-72.3 (+1 eps) [worker 2] episodes_seen=2380 last_return=-47.2 (+1 eps) [worker 0] episodes_seen=2340 last_return=48.4 (+1 eps) [worker 1] episodes_seen=2360 last_return=-124.1 (+1 eps) [worker 3] episodes_seen=2370 last_return=-52.4 (+1 eps) [worker 2] episodes_seen=2390 last_return=-79.4 (+1 eps) [A2C][sync] it= 8341 steps= 1000920 (+120) avg10= -70.66 loss=44.540 pg=0.008 vf=80.984 H=0.990 gn=254.912 [worker 0] episodes_seen=2350 last_return=142.8 (+1 eps) [worker 1] episodes_seen=2370 last_return=-91.6 (+1 eps) [worker 3] episodes_seen=2380 last_return=-159.8 (+1 eps) [worker 2] episodes_seen=2400 last_return=-144.4 (+1 eps) [worker 0] episodes_seen=2360 last_return=-65.6 (+1 eps) [worker 1] episodes_seen=2380 last_return=-38.5 (+1 eps) [worker 2] episodes_seen=2410 last_return=-127.6 (+1 eps) [worker 3] episodes_seen=2390 last_return=-71.8 (+1 eps) [worker 0] episodes_seen=2370 last_return=-166.9 (+1 eps) [worker 1] episodes_seen=2390 last_return=-203.6 (+1 eps) [worker 2] episodes_seen=2420 last_return=-97.0 (+1 eps) [worker 3] episodes_seen=2400 last_return=-56.1 (+1 eps) [worker 0] episodes_seen=2380 last_return=-46.1 (+1 eps) [worker 1] episodes_seen=2400 last_return=-16.4 (+1 eps) [worker 2] episodes_seen=2430 last_return=-82.5 (+1 eps) [worker 3] episodes_seen=2410 last_return=-66.2 (+1 eps) [worker 0] episodes_seen=2390 last_return=-54.5 (+1 eps) [worker 1] episodes_seen=2410 last_return=-79.7 (+1 eps) [worker 2] episodes_seen=2440 last_return=-62.1 (+1 eps) [worker 3] episodes_seen=2420 last_return=-76.2 (+1 eps) [worker 0] episodes_seen=2400 last_return=-72.7 (+1 eps) [worker 2] episodes_seen=2450 last_return=-195.7 (+1 eps) [worker 1] episodes_seen=2420 last_return=-66.4 (+1 eps) [worker 3] episodes_seen=2430 last_return=-129.0 (+1 eps) [worker 0] episodes_seen=2410 last_return=-74.8 (+1 eps) [worker 2] episodes_seen=2460 last_return=-19.7 (+1 eps) [worker 1] episodes_seen=2430 last_return=-32.0 (+1 eps) [worker 3] episodes_seen=2440 last_return=-80.9 (+1 eps) [worker 2] episodes_seen=2470 last_return=-91.8 (+1 eps) [worker 1] episodes_seen=2440 last_return=-71.0 (+1 eps) [worker 3] episodes_seen=2450 last_return=-58.2 (+1 eps) [worker 0] episodes_seen=2420 last_return=-221.5 (+1 eps) [worker 2] episodes_seen=2480 last_return=-68.0 (+1 eps) [worker 3] episodes_seen=2460 last_return=-130.0 (+1 eps) [worker 1] episodes_seen=2450 last_return=-79.9 (+1 eps) [worker 2] episodes_seen=2490 last_return=-95.6 (+1 eps) [worker 3] episodes_seen=2470 last_return=-73.3 (+1 eps) [worker 1] episodes_seen=2460 last_return=-78.2 (+1 eps) [worker 0] episodes_seen=2430 last_return=-61.9 (+1 eps) [A2C][sync] it= 8758 steps= 1050960 (+120) avg10=-103.88 loss=414.642 pg=0.025 vf=753.867 H=0.950 gn=2248.128 [worker 2] episodes_seen=2500 last_return=-90.2 (+1 eps) [worker 3] episodes_seen=2480 last_return=-76.2 (+1 eps) [worker 1] episodes_seen=2470 last_return=2.9 (+1 eps) [worker 2] episodes_seen=2510 last_return=-34.9 (+1 eps) [worker 3] episodes_seen=2490 last_return=-1.0 (+1 eps) [worker 0] episodes_seen=2440 last_return=-44.5 (+1 eps) [worker 1] episodes_seen=2480 last_return=-173.0 (+1 eps) [worker 2] episodes_seen=2520 last_return=-69.1 (+1 eps) [worker 0] episodes_seen=2450 last_return=-92.8 (+1 eps) [worker 3] episodes_seen=2500 last_return=-81.1 (+1 eps) [worker 1] episodes_seen=2490 last_return=-156.4 (+1 eps) [worker 0] episodes_seen=2460 last_return=-81.4 (+1 eps) [worker 1] episodes_seen=2500 last_return=32.4 (+1 eps) [worker 2] episodes_seen=2530 last_return=-119.0 (+1 eps) [worker 0] episodes_seen=2470 last_return=-28.5 (+1 eps) [worker 3] episodes_seen=2510 last_return=14.2 (+1 eps) [worker 1] episodes_seen=2510 last_return=-64.4 (+1 eps) [worker 2] episodes_seen=2540 last_return=-67.2 (+1 eps) [worker 0] episodes_seen=2480 last_return=-49.6 (+1 eps) [worker 3] episodes_seen=2520 last_return=-44.5 (+1 eps) [worker 2] episodes_seen=2550 last_return=-88.0 (+1 eps) [worker 0] episodes_seen=2490 last_return=-188.1 (+1 eps) [worker 3] episodes_seen=2530 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2520 last_return=-223.7 (+1 eps) [worker 2] episodes_seen=2560 last_return=-91.6 (+1 eps) [worker 0] episodes_seen=2500 last_return=-24.9 (+1 eps) [worker 3] episodes_seen=2540 last_return=-100.9 (+1 eps) [worker 2] episodes_seen=2570 last_return=-89.7 (+1 eps) [worker 0] episodes_seen=2510 last_return=-75.0 (+1 eps) [worker 1] episodes_seen=2530 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=2550 last_return=-73.7 (+1 eps) [worker 2] episodes_seen=2580 last_return=-85.3 (+1 eps) [worker 0] episodes_seen=2520 last_return=-152.4 (+1 eps) [worker 1] episodes_seen=2540 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=2590 last_return=-53.8 (+1 eps) [worker 3] episodes_seen=2560 last_return=-22.7 (+1 eps) [worker 0] episodes_seen=2530 last_return=-142.9 (+1 eps) [A2C][sync] it= 9175 steps= 1101000 (+120) avg10= -69.37 loss=224.482 pg=0.036 vf=408.101 H=0.964 gn=848.849 [worker 1] episodes_seen=2550 last_return=-101.0 (+1 eps) [worker 2] episodes_seen=2600 last_return=-87.1 (+1 eps) [worker 0] episodes_seen=2540 last_return=-29.0 (+1 eps) [worker 3] episodes_seen=2570 last_return=-72.7 (+1 eps) [worker 0] episodes_seen=2550 last_return=-82.3 (+1 eps) [worker 2] episodes_seen=2610 last_return=-69.5 (+1 eps) [worker 1] episodes_seen=2560 last_return=-122.1 (+1 eps) [worker 3] episodes_seen=2580 last_return=-75.7 (+1 eps) [worker 2] episodes_seen=2620 last_return=-94.8 (+1 eps) [worker 0] episodes_seen=2560 last_return=-51.1 (+1 eps) [worker 3] episodes_seen=2590 last_return=-101.4 (+1 eps) [worker 1] episodes_seen=2570 last_return=-77.1 (+1 eps) [worker 3] episodes_seen=2600 last_return=-29.2 (+1 eps) [worker 1] episodes_seen=2580 last_return=-98.5 (+1 eps) [worker 0] episodes_seen=2570 last_return=-109.0 (+1 eps) [worker 2] episodes_seen=2630 last_return=-91.1 (+1 eps) [worker 2] episodes_seen=2640 last_return=-59.0 (+1 eps) [worker 1] episodes_seen=2590 last_return=-46.8 (+1 eps) [worker 3] episodes_seen=2610 last_return=-66.6 (+1 eps) [worker 0] episodes_seen=2580 last_return=-115.6 (+1 eps) [worker 3] episodes_seen=2620 last_return=-172.7 (+1 eps) [worker 0] episodes_seen=2590 last_return=-34.3 (+1 eps) [worker 2] episodes_seen=2650 last_return=-78.0 (+1 eps) [worker 1] episodes_seen=2600 last_return=-1.7 (+1 eps) [worker 3] episodes_seen=2630 last_return=-41.3 (+1 eps) [worker 2] episodes_seen=2660 last_return=-71.9 (+1 eps) [worker 1] episodes_seen=2610 last_return=-61.1 (+1 eps) [worker 0] episodes_seen=2600 last_return=-78.6 (+1 eps) [worker 3] episodes_seen=2640 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=2670 last_return=-439.1 (+1 eps) [worker 1] episodes_seen=2620 last_return=-376.2 (+1 eps) [worker 0] episodes_seen=2610 last_return=-140.3 (+1 eps) [worker 3] episodes_seen=2650 last_return=-282.5 (+1 eps) [worker 2] episodes_seen=2680 last_return=-313.9 (+1 eps) [worker 1] episodes_seen=2630 last_return=-327.5 (+1 eps) [A2C][sync] it= 9592 steps= 1151040 (+120) avg10=-263.98 loss=25448.363 pg=-0.008 vf=46269.766 H=0.059 gn=71156.352 [worker 0] episodes_seen=2620 last_return=-241.2 (+1 eps) [worker 3] episodes_seen=2660 last_return=-390.5 (+1 eps) [worker 2] episodes_seen=2690 last_return=-422.0 (+1 eps) [worker 1] episodes_seen=2640 last_return=-413.2 (+1 eps) [worker 0] episodes_seen=2630 last_return=-129.1 (+1 eps) [worker 3] episodes_seen=2670 last_return=-228.3 (+1 eps) [worker 2] episodes_seen=2700 last_return=-143.2 (+1 eps) [worker 1] episodes_seen=2650 last_return=-160.3 (+1 eps) [worker 3] episodes_seen=2680 last_return=-71.5 (+1 eps) [worker 0] episodes_seen=2640 last_return=-99.2 (+1 eps) [worker 2] episodes_seen=2710 last_return=-39.3 (+1 eps) [worker 0] episodes_seen=2650 last_return=-101.3 (+1 eps) [worker 2] episodes_seen=2720 last_return=-101.0 (+1 eps) [worker 1] episodes_seen=2660 last_return=-63.7 (+1 eps) [worker 3] episodes_seen=2690 last_return=-3.8 (+1 eps) [worker 0] episodes_seen=2660 last_return=-72.2 (+1 eps) [worker 2] episodes_seen=2730 last_return=-63.4 (+1 eps) [worker 1] episodes_seen=2670 last_return=-72.7 (+1 eps) [worker 0] episodes_seen=2670 last_return=-133.9 (+1 eps) [worker 3] episodes_seen=2700 last_return=-76.7 (+1 eps) [worker 2] episodes_seen=2740 last_return=-51.7 (+1 eps) [worker 1] episodes_seen=2680 last_return=-113.4 (+1 eps) [worker 0] episodes_seen=2680 last_return=-70.7 (+1 eps) [worker 3] episodes_seen=2710 last_return=-51.1 (+1 eps) [worker 1] episodes_seen=2690 last_return=-23.4 (+1 eps) [worker 2] episodes_seen=2750 last_return=-60.1 (+1 eps) [worker 0] episodes_seen=2690 last_return=-83.4 (+1 eps) [worker 1] episodes_seen=2700 last_return=-77.8 (+1 eps) [worker 2] episodes_seen=2760 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=2700 last_return=-64.3 (+1 eps) [worker 1] episodes_seen=2710 last_return=-40.9 (+1 eps) [worker 2] episodes_seen=2770 last_return=-76.7 (+1 eps) [worker 3] episodes_seen=2720 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2720 last_return=-100.1 (+1 eps) [worker 2] episodes_seen=2780 last_return=-58.7 (+1 eps) [worker 3] episodes_seen=2730 last_return=-130.8 (+1 eps) [worker 0] episodes_seen=2710 last_return=-92.2 (+1 eps) [A2C][sync] it=10000 steps= 1200000 (+120) avg10= -79.78 loss=286.818 pg=-0.106 vf=521.698 H=0.914 gn=1663.722 [Checkpoint] Saved self-describing checkpoint → part2_artifacts/checkpoints/a2c_run8_seed1227.pth [A2C][sync] done: steps=1200000 time=1080.6s avg10=-79.78
[Run run8_seed1227] checkpoint: part2_artifacts/checkpoints/a2c_run8_seed1227.pth [Run run8_seed1227] training plot (tail 500): part2_artifacts/train_curve_run8_seed1227.png [Run run8_seed1227] training plot (full): part2_artifacts/train_curve_full_run8_seed1227.png [Run run8_seed1227] per-worker plot: part2_artifacts/train_curve_workers_run8_seed1227.png [Run run8_seed1227] workers-average plot: part2_artifacts/train_curve_workers_avg_run8_seed1227.png
[Eval run8_seed1227] mean=-100.79 std=23.60 min=-132.57 max=-46.92 [Eval run8_seed1227] CSV: part2_artifacts/eval10_run8_seed1227.csv [Eval run8_seed1227] plot: part2_artifacts/eval10_run8_seed1227.png [Best] ep=5 return=-46.92 seed=1232
/usr/local/lib/python3.12/dist-packages/gymnasium/wrappers/rendering.py:293: UserWarning: WARN: Overwriting existing videos at /content/part2_artifacts/videos/run8_seed1227 folder (try specifying a different `video_folder` for the `RecordVideo` wrapper if this is not desired)
logger.warn(
[Video run8_seed1227] episode return=-46.92 [Video run8_seed1227] saved under: part2_artifacts/videos run8_seed1227 | mean=-100.8±23.6 | best_ep=5, best_ret=-46.9
Run#9
run_id = f"run9_seed{SEED}"
# Train (multi-worker) and save checkpoint & plots
model, logs, paths = train_once(
run_id=run_id,
n_workers=4,
total_env_steps=1_200_000,
T=30,
gamma=0.99,
entropy_coef=0.02,
value_coef=0.55,
max_grad_norm=0.5,
lr=3e-4,
log_every=50_000,
)
# Fixed-seed greedy evaluation (10 episodes)
metrics, eval_paths = evaluate_10(run_id, paths.ckpt_path)
# Record the best episode from eval-10 using its seed
video_dir, best_idx, best_ret = record_best_from_eval(run_id, paths.ckpt_path)
print(f"{run_id} | mean={metrics['mean']:.1f}±{metrics['std']:.1f} | best_ep={best_idx}, best_ret={best_ret:.1f}")
[Run run9_seed1227] starting training… [A2C][sync] start: workers=4, T=30, target_steps=1200000, mp=fork [A2C][sync] it= 1 steps= 120 (+120) avg10= nan loss=82.878 pg=-0.000 vf=150.738 H=1.386 gn=23.857 [worker 0] episodes_seen=10 last_return=-86.5 (+1 eps) [worker 1] episodes_seen=10 last_return=-172.5 (+1 eps) [worker 2] episodes_seen=10 last_return=-216.8 (+1 eps) [worker 3] episodes_seen=10 last_return=-70.9 (+1 eps) [worker 0] episodes_seen=20 last_return=-283.0 (+1 eps) [worker 2] episodes_seen=20 last_return=-285.2 (+1 eps) [worker 1] episodes_seen=20 last_return=-245.6 (+1 eps) [worker 3] episodes_seen=20 last_return=-277.3 (+1 eps) [worker 0] episodes_seen=30 last_return=-120.9 (+1 eps) [worker 2] episodes_seen=30 last_return=-88.9 (+1 eps) [worker 1] episodes_seen=30 last_return=-87.3 (+1 eps) [worker 3] episodes_seen=30 last_return=-146.5 (+1 eps) [worker 2] episodes_seen=40 last_return=-121.3 (+1 eps) [worker 0] episodes_seen=40 last_return=-80.8 (+1 eps) [worker 3] episodes_seen=40 last_return=-100.5 (+1 eps) [worker 1] episodes_seen=40 last_return=17.4 (+1 eps) [worker 2] episodes_seen=50 last_return=-108.5 (+1 eps) [worker 3] episodes_seen=50 last_return=-154.9 (+1 eps) [worker 0] episodes_seen=50 last_return=-160.2 (+1 eps) [worker 1] episodes_seen=50 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=60 last_return=-189.3 (+1 eps) [worker 3] episodes_seen=60 last_return=-115.9 (+1 eps) [worker 0] episodes_seen=60 last_return=-131.9 (+1 eps) [worker 1] episodes_seen=60 last_return=-244.5 (+1 eps) [worker 0] episodes_seen=70 last_return=-193.2 (+1 eps) [worker 3] episodes_seen=70 last_return=-133.8 (+1 eps) [worker 2] episodes_seen=70 last_return=-176.0 (+1 eps) [worker 1] episodes_seen=70 last_return=-181.8 (+1 eps) [worker 0] episodes_seen=80 last_return=-192.7 (+1 eps) [worker 2] episodes_seen=80 last_return=-186.2 (+1 eps) [worker 3] episodes_seen=80 last_return=-199.6 (+1 eps) [worker 1] episodes_seen=80 last_return=-227.1 (+1 eps) [worker 0] episodes_seen=90 last_return=-117.1 (+1 eps) [worker 2] episodes_seen=90 last_return=-244.8 (+1 eps) [worker 3] episodes_seen=90 last_return=-286.6 (+1 eps) [worker 1] episodes_seen=90 last_return=-286.2 (+1 eps) [worker 0] episodes_seen=100 last_return=-288.4 (+1 eps) [worker 1] episodes_seen=100 last_return=-280.8 (+1 eps) [worker 2] episodes_seen=100 last_return=-293.6 (+1 eps) [worker 3] episodes_seen=100 last_return=-187.9 (+1 eps) [worker 0] episodes_seen=110 last_return=-195.0 (+1 eps) [worker 1] episodes_seen=110 last_return=-134.8 (+1 eps) [worker 2] episodes_seen=110 last_return=-107.3 (+1 eps) [worker 3] episodes_seen=110 last_return=-272.0 (+1 eps) [A2C][sync] it= 418 steps= 50160 (+120) avg10=-190.77 loss=3828.297 pg=0.080 vf=6960.405 H=0.244 gn=10114.216 [worker 0] episodes_seen=120 last_return=-265.0 (+1 eps) [worker 1] episodes_seen=120 last_return=-235.8 (+1 eps) [worker 2] episodes_seen=120 last_return=-289.9 (+1 eps) [worker 3] episodes_seen=120 last_return=-199.2 (+1 eps) [worker 0] episodes_seen=130 last_return=-108.8 (+1 eps) [worker 1] episodes_seen=130 last_return=-173.2 (+1 eps) [worker 2] episodes_seen=130 last_return=-106.1 (+1 eps) [worker 3] episodes_seen=130 last_return=-101.5 (+1 eps) [worker 0] episodes_seen=140 last_return=-120.3 (+1 eps) [worker 2] episodes_seen=140 last_return=-57.0 (+1 eps) [worker 1] episodes_seen=140 last_return=-166.0 (+1 eps) [worker 3] episodes_seen=140 last_return=-112.3 (+1 eps) [worker 0] episodes_seen=150 last_return=-37.8 (+1 eps) [worker 1] episodes_seen=150 last_return=-110.0 (+1 eps) [worker 2] episodes_seen=150 last_return=-136.3 (+1 eps) [worker 3] episodes_seen=150 last_return=-129.2 (+1 eps) [worker 0] episodes_seen=160 last_return=-175.5 (+1 eps) [worker 1] episodes_seen=160 last_return=-250.6 (+1 eps) [worker 2] episodes_seen=160 last_return=-89.1 (+1 eps) [worker 3] episodes_seen=160 last_return=-104.7 (+1 eps) [worker 0] episodes_seen=170 last_return=-66.8 (+1 eps) [worker 1] episodes_seen=170 last_return=-102.6 (+1 eps) [worker 2] episodes_seen=170 last_return=-115.4 (+1 eps) [worker 3] episodes_seen=170 last_return=-132.1 (+1 eps) [worker 0] episodes_seen=180 last_return=-234.7 (+1 eps) [worker 1] episodes_seen=180 last_return=-81.0 (+1 eps) [worker 2] episodes_seen=180 last_return=-90.5 (+1 eps) [worker 3] episodes_seen=180 last_return=-79.1 (+1 eps) [worker 0] episodes_seen=190 last_return=-96.8 (+1 eps) [worker 1] episodes_seen=190 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=190 last_return=-93.1 (+1 eps) [worker 3] episodes_seen=190 last_return=-112.8 (+1 eps) [worker 0] episodes_seen=200 last_return=-146.4 (+1 eps) [worker 1] episodes_seen=200 last_return=-148.9 (+1 eps) [worker 2] episodes_seen=200 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=200 last_return=-276.0 (+1 eps) [worker 0] episodes_seen=210 last_return=-127.1 (+1 eps) [worker 1] episodes_seen=210 last_return=-82.2 (+1 eps) [worker 2] episodes_seen=210 last_return=-112.9 (+1 eps) [worker 3] episodes_seen=210 last_return=-93.0 (+1 eps) [worker 0] episodes_seen=220 last_return=-75.4 (+1 eps) [worker 1] episodes_seen=220 last_return=-119.0 (+1 eps) [worker 2] episodes_seen=220 last_return=-182.8 (+1 eps) [worker 3] episodes_seen=220 last_return=-92.3 (+1 eps) [worker 0] episodes_seen=230 last_return=-82.7 (+1 eps) [worker 1] episodes_seen=230 last_return=-301.8 (+1 eps) [worker 2] episodes_seen=230 last_return=-121.6 (+1 eps) [worker 3] episodes_seen=230 last_return=-203.3 (+1 eps) [worker 0] episodes_seen=240 last_return=-149.2 (+1 eps) [worker 1] episodes_seen=240 last_return=-253.1 (+1 eps) [worker 2] episodes_seen=240 last_return=-99.4 (+1 eps) [worker 3] episodes_seen=240 last_return=-177.5 (+1 eps) [worker 0] episodes_seen=250 last_return=-275.3 (+1 eps) [A2C][sync] it= 835 steps= 100200 (+120) avg10=-275.88 loss=11661.759 pg=0.259 vf=21202.750 H=0.633 gn=20231.148 [worker 1] episodes_seen=250 last_return=-164.6 (+1 eps) [worker 2] episodes_seen=250 last_return=-234.9 (+1 eps) [worker 3] episodes_seen=250 last_return=62.2 (+1 eps) [worker 0] episodes_seen=260 last_return=-79.6 (+1 eps) [worker 1] episodes_seen=260 last_return=-154.3 (+1 eps) [worker 2] episodes_seen=260 last_return=-247.6 (+1 eps) [worker 3] episodes_seen=260 last_return=-155.9 (+1 eps) [worker 0] episodes_seen=270 last_return=-178.7 (+1 eps) [worker 1] episodes_seen=270 last_return=-342.1 (+1 eps) [worker 2] episodes_seen=270 last_return=-291.5 (+1 eps) [worker 3] episodes_seen=270 last_return=-379.5 (+1 eps) [worker 0] episodes_seen=280 last_return=-241.3 (+1 eps) [worker 1] episodes_seen=280 last_return=-118.3 (+1 eps) [worker 2] episodes_seen=280 last_return=-191.7 (+1 eps) [worker 3] episodes_seen=280 last_return=-296.9 (+1 eps) [worker 0] episodes_seen=290 last_return=-108.4 (+1 eps) [worker 1] episodes_seen=290 last_return=-344.1 (+1 eps) [worker 2] episodes_seen=290 last_return=-409.4 (+1 eps) [worker 0] episodes_seen=300 last_return=-392.0 (+1 eps) [worker 3] episodes_seen=290 last_return=-281.9 (+1 eps) [worker 1] episodes_seen=300 last_return=-98.8 (+1 eps) [worker 2] episodes_seen=300 last_return=-121.3 (+1 eps) [worker 3] episodes_seen=300 last_return=-275.1 (+1 eps) [worker 0] episodes_seen=310 last_return=-385.6 (+1 eps) [worker 1] episodes_seen=310 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=310 last_return=-270.3 (+1 eps) [worker 3] episodes_seen=310 last_return=-355.1 (+1 eps) [worker 0] episodes_seen=320 last_return=-149.8 (+1 eps) [worker 1] episodes_seen=320 last_return=-391.7 (+1 eps) [worker 2] episodes_seen=320 last_return=-176.8 (+1 eps) [worker 0] episodes_seen=330 last_return=-338.3 (+1 eps) [worker 3] episodes_seen=320 last_return=-175.9 (+1 eps) [worker 1] episodes_seen=330 last_return=-272.1 (+1 eps) [worker 2] episodes_seen=330 last_return=-263.0 (+1 eps) [worker 0] episodes_seen=340 last_return=-164.0 (+1 eps) [worker 3] episodes_seen=330 last_return=-354.3 (+1 eps) [worker 1] episodes_seen=340 last_return=-328.6 (+1 eps) [worker 2] episodes_seen=340 last_return=-209.4 (+1 eps) [worker 3] episodes_seen=340 last_return=-145.9 (+1 eps) [worker 0] episodes_seen=350 last_return=-118.3 (+1 eps) [worker 1] episodes_seen=350 last_return=-301.4 (+1 eps) [worker 2] episodes_seen=350 last_return=-173.7 (+1 eps) [worker 0] episodes_seen=360 last_return=-189.9 (+1 eps) [worker 3] episodes_seen=350 last_return=-388.5 (+1 eps) [worker 1] episodes_seen=360 last_return=-276.8 (+1 eps) [worker 2] episodes_seen=360 last_return=-225.6 (+1 eps) [worker 0] episodes_seen=370 last_return=-141.6 (+1 eps) [worker 3] episodes_seen=360 last_return=-221.6 (+1 eps) [worker 1] episodes_seen=370 last_return=-356.6 (+1 eps) [worker 2] episodes_seen=370 last_return=-198.4 (+1 eps) [worker 0] episodes_seen=380 last_return=0.7 (+1 eps) [worker 3] episodes_seen=370 last_return=-330.7 (+1 eps) [worker 1] episodes_seen=380 last_return=-261.1 (+1 eps) [worker 0] episodes_seen=390 last_return=-185.9 (+1 eps) [worker 2] episodes_seen=380 last_return=-223.2 (+1 eps) [worker 3] episodes_seen=380 last_return=-263.6 (+1 eps) [worker 1] episodes_seen=390 last_return=-268.0 (+1 eps) [A2C][sync] it= 1252 steps= 150240 (+120) avg10=-219.45 loss=12248.002 pg=0.004 vf=22269.111 H=0.700 gn=27819.611 [worker 0] episodes_seen=400 last_return=-286.0 (+1 eps) [worker 2] episodes_seen=390 last_return=-316.5 (+1 eps) [worker 3] episodes_seen=390 last_return=-248.9 (+1 eps) [worker 1] episodes_seen=400 last_return=-352.5 (+1 eps) [worker 2] episodes_seen=400 last_return=-225.9 (+1 eps) [worker 0] episodes_seen=410 last_return=20.6 (+1 eps) [worker 3] episodes_seen=400 last_return=-426.4 (+1 eps) [worker 1] episodes_seen=410 last_return=-283.3 (+1 eps) [worker 2] episodes_seen=410 last_return=-365.9 (+1 eps) [worker 0] episodes_seen=420 last_return=-361.9 (+1 eps) [worker 3] episodes_seen=410 last_return=-238.1 (+1 eps) [worker 1] episodes_seen=420 last_return=-222.8 (+1 eps) [worker 2] episodes_seen=420 last_return=-281.4 (+1 eps) [worker 0] episodes_seen=430 last_return=-246.5 (+1 eps) [worker 3] episodes_seen=420 last_return=-223.1 (+1 eps) [worker 1] episodes_seen=430 last_return=-206.0 (+1 eps) [worker 2] episodes_seen=430 last_return=-233.6 (+1 eps) [worker 0] episodes_seen=440 last_return=-153.0 (+1 eps) [worker 3] episodes_seen=430 last_return=-211.6 (+1 eps) [worker 1] episodes_seen=440 last_return=-284.4 (+1 eps) [worker 2] episodes_seen=440 last_return=-332.4 (+1 eps) [worker 3] episodes_seen=440 last_return=-133.9 (+1 eps) [worker 0] episodes_seen=450 last_return=-416.4 (+1 eps) [worker 1] episodes_seen=450 last_return=-366.6 (+1 eps) [worker 2] episodes_seen=450 last_return=-328.5 (+1 eps) [worker 3] episodes_seen=450 last_return=-162.8 (+1 eps) [worker 0] episodes_seen=460 last_return=-275.5 (+1 eps) [worker 1] episodes_seen=460 last_return=-436.6 (+1 eps) [worker 2] episodes_seen=460 last_return=-431.8 (+1 eps) [worker 3] episodes_seen=460 last_return=-243.6 (+1 eps) [worker 0] episodes_seen=470 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=470 last_return=-319.8 (+1 eps) [worker 2] episodes_seen=470 last_return=-316.0 (+1 eps) [worker 3] episodes_seen=470 last_return=-127.7 (+1 eps) [worker 0] episodes_seen=480 last_return=-272.2 (+1 eps) [worker 1] episodes_seen=480 last_return=-170.2 (+1 eps) [worker 2] episodes_seen=480 last_return=-509.9 (+1 eps) [worker 3] episodes_seen=480 last_return=-366.9 (+1 eps) [worker 0] episodes_seen=490 last_return=-407.7 (+1 eps) [worker 1] episodes_seen=490 last_return=-112.6 (+1 eps) [worker 2] episodes_seen=490 last_return=-357.6 (+1 eps) [worker 3] episodes_seen=490 last_return=-196.5 (+1 eps) [worker 1] episodes_seen=500 last_return=-195.4 (+1 eps) [worker 0] episodes_seen=500 last_return=-145.8 (+1 eps) [worker 2] episodes_seen=500 last_return=-186.0 (+1 eps) [worker 3] episodes_seen=500 last_return=-140.8 (+1 eps) [worker 1] episodes_seen=510 last_return=-189.0 (+1 eps) [worker 0] episodes_seen=510 last_return=-257.7 (+1 eps) [worker 2] episodes_seen=510 last_return=-91.2 (+1 eps) [worker 3] episodes_seen=510 last_return=-122.6 (+1 eps) [worker 1] episodes_seen=520 last_return=-143.5 (+1 eps) [worker 2] episodes_seen=520 last_return=-75.8 (+1 eps) [worker 0] episodes_seen=520 last_return=-82.3 (+1 eps) [worker 3] episodes_seen=520 last_return=-93.4 (+1 eps) [worker 1] episodes_seen=530 last_return=-92.1 (+1 eps) [worker 2] episodes_seen=530 last_return=-119.5 (+1 eps) [worker 0] episodes_seen=530 last_return=-166.8 (+1 eps) [worker 3] episodes_seen=530 last_return=-141.1 (+1 eps) [worker 1] episodes_seen=540 last_return=-345.5 (+1 eps) [A2C][sync] it= 1669 steps= 200280 (+120) avg10=-120.22 loss=651.987 pg=-0.076 vf=1185.609 H=1.114 gn=3171.847 [worker 2] episodes_seen=540 last_return=-119.3 (+1 eps) [worker 0] episodes_seen=540 last_return=-137.5 (+1 eps) [worker 3] episodes_seen=540 last_return=-93.2 (+1 eps) [worker 1] episodes_seen=550 last_return=-156.7 (+1 eps) [worker 2] episodes_seen=550 last_return=-253.6 (+1 eps) [worker 0] episodes_seen=550 last_return=-116.6 (+1 eps) [worker 3] episodes_seen=550 last_return=-338.4 (+1 eps) [worker 1] episodes_seen=560 last_return=-309.9 (+1 eps) [worker 0] episodes_seen=560 last_return=-196.7 (+1 eps) [worker 2] episodes_seen=560 last_return=-190.4 (+1 eps) [worker 3] episodes_seen=560 last_return=-114.5 (+1 eps) [worker 1] episodes_seen=570 last_return=-109.5 (+1 eps) [worker 0] episodes_seen=570 last_return=-274.4 (+1 eps) [worker 2] episodes_seen=570 last_return=-121.9 (+1 eps) [worker 3] episodes_seen=570 last_return=-146.7 (+1 eps) [worker 1] episodes_seen=580 last_return=-232.4 (+1 eps) [worker 0] episodes_seen=580 last_return=-198.4 (+1 eps) [worker 2] episodes_seen=580 last_return=-222.8 (+1 eps) [worker 3] episodes_seen=580 last_return=-274.4 (+1 eps) [worker 1] episodes_seen=590 last_return=-198.7 (+1 eps) [worker 0] episodes_seen=590 last_return=-148.7 (+1 eps) [worker 2] episodes_seen=590 last_return=-266.3 (+1 eps) [worker 3] episodes_seen=590 last_return=-215.9 (+1 eps) [worker 1] episodes_seen=600 last_return=-175.4 (+1 eps) [worker 0] episodes_seen=600 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=600 last_return=-301.9 (+1 eps) [worker 3] episodes_seen=600 last_return=-179.1 (+1 eps) [worker 1] episodes_seen=610 last_return=-365.7 (+1 eps) [worker 0] episodes_seen=610 last_return=-283.0 (+1 eps) [worker 2] episodes_seen=610 last_return=-252.3 (+1 eps) [worker 3] episodes_seen=610 last_return=-312.3 (+1 eps) [worker 1] episodes_seen=620 last_return=-168.3 (+1 eps) [worker 0] episodes_seen=620 last_return=-138.2 (+1 eps) [worker 2] episodes_seen=620 last_return=-392.8 (+1 eps) [worker 3] episodes_seen=620 last_return=-229.0 (+1 eps) [worker 0] episodes_seen=630 last_return=-225.3 (+1 eps) [worker 1] episodes_seen=630 last_return=-170.1 (+1 eps) [worker 3] episodes_seen=630 last_return=-387.1 (+1 eps) [worker 2] episodes_seen=630 last_return=-138.1 (+1 eps) [worker 0] episodes_seen=640 last_return=-335.3 (+1 eps) [worker 1] episodes_seen=640 last_return=-127.5 (+1 eps) [worker 2] episodes_seen=640 last_return=-236.4 (+1 eps) [worker 3] episodes_seen=640 last_return=-206.7 (+1 eps) [worker 0] episodes_seen=650 last_return=-343.1 (+1 eps) [worker 1] episodes_seen=650 last_return=-117.1 (+1 eps) [worker 2] episodes_seen=650 last_return=-180.6 (+1 eps) [worker 3] episodes_seen=650 last_return=-93.4 (+1 eps) [worker 1] episodes_seen=660 last_return=-79.1 (+1 eps) [worker 0] episodes_seen=660 last_return=-168.9 (+1 eps) [worker 2] episodes_seen=660 last_return=-119.0 (+1 eps) [worker 3] episodes_seen=660 last_return=-124.0 (+1 eps) [worker 1] episodes_seen=670 last_return=-98.5 (+1 eps) [worker 0] episodes_seen=670 last_return=-213.1 (+1 eps) [worker 3] episodes_seen=670 last_return=-63.2 (+1 eps) [worker 2] episodes_seen=670 last_return=-92.9 (+1 eps) [worker 0] episodes_seen=680 last_return=-88.5 (+1 eps) [worker 1] episodes_seen=680 last_return=-102.0 (+1 eps) [worker 3] episodes_seen=680 last_return=-77.3 (+1 eps) [worker 2] episodes_seen=680 last_return=-92.2 (+1 eps) [A2C][sync] it= 2086 steps= 250320 (+120) avg10= -95.98 loss=980.529 pg=-0.381 vf=1783.502 H=0.812 gn=1222.064 [worker 1] episodes_seen=690 last_return=-101.2 (+1 eps) [worker 0] episodes_seen=690 last_return=-114.8 (+1 eps) [worker 3] episodes_seen=690 last_return=-83.8 (+1 eps) [worker 2] episodes_seen=690 last_return=-85.8 (+1 eps) [worker 0] episodes_seen=700 last_return=-114.1 (+1 eps) [worker 1] episodes_seen=700 last_return=-72.3 (+1 eps) [worker 3] episodes_seen=700 last_return=-78.0 (+1 eps) [worker 2] episodes_seen=700 last_return=-77.8 (+1 eps) [worker 1] episodes_seen=710 last_return=-132.0 (+1 eps) [worker 2] episodes_seen=710 last_return=-55.6 (+1 eps) [worker 3] episodes_seen=710 last_return=-95.9 (+1 eps) [worker 0] episodes_seen=710 last_return=-93.7 (+1 eps) [worker 2] episodes_seen=720 last_return=-255.7 (+1 eps) [worker 3] episodes_seen=720 last_return=-186.5 (+1 eps) [worker 1] episodes_seen=720 last_return=-197.1 (+1 eps) [worker 0] episodes_seen=720 last_return=-145.6 (+1 eps) [worker 3] episodes_seen=730 last_return=-129.8 (+1 eps) [worker 2] episodes_seen=730 last_return=-307.8 (+1 eps) [worker 1] episodes_seen=730 last_return=-149.8 (+1 eps) [worker 0] episodes_seen=730 last_return=-143.8 (+1 eps) [worker 2] episodes_seen=740 last_return=-258.9 (+1 eps) [worker 3] episodes_seen=740 last_return=-158.3 (+1 eps) [worker 1] episodes_seen=740 last_return=-110.0 (+1 eps) [worker 0] episodes_seen=740 last_return=-122.9 (+1 eps) [worker 2] episodes_seen=750 last_return=-83.4 (+1 eps) [worker 3] episodes_seen=750 last_return=-88.0 (+1 eps) [worker 1] episodes_seen=750 last_return=-86.2 (+1 eps) [worker 0] episodes_seen=750 last_return=-92.5 (+1 eps) [worker 2] episodes_seen=760 last_return=-70.5 (+1 eps) [worker 3] episodes_seen=760 last_return=-136.8 (+1 eps) [worker 1] episodes_seen=760 last_return=-121.5 (+1 eps) [worker 0] episodes_seen=760 last_return=-60.5 (+1 eps) [worker 2] episodes_seen=770 last_return=-108.9 (+1 eps) [worker 3] episodes_seen=770 last_return=-122.9 (+1 eps) [worker 1] episodes_seen=770 last_return=-101.8 (+1 eps) [worker 0] episodes_seen=770 last_return=-169.6 (+1 eps) [worker 2] episodes_seen=780 last_return=-92.5 (+1 eps) [worker 3] episodes_seen=780 last_return=-110.7 (+1 eps) [worker 1] episodes_seen=780 last_return=-102.8 (+1 eps) [worker 0] episodes_seen=780 last_return=-98.9 (+1 eps) [worker 3] episodes_seen=790 last_return=-132.0 (+1 eps) [worker 2] episodes_seen=790 last_return=-95.2 (+1 eps) [worker 1] episodes_seen=790 last_return=-151.3 (+1 eps) [worker 0] episodes_seen=790 last_return=-156.3 (+1 eps) [worker 2] episodes_seen=800 last_return=-149.8 (+1 eps) [worker 3] episodes_seen=800 last_return=-432.1 (+1 eps) [worker 1] episodes_seen=800 last_return=-309.9 (+1 eps) [worker 3] episodes_seen=810 last_return=-286.6 (+1 eps) [worker 2] episodes_seen=810 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=800 last_return=-155.2 (+1 eps) [worker 2] episodes_seen=820 last_return=-297.1 (+1 eps) [worker 1] episodes_seen=810 last_return=-448.8 (+1 eps) [worker 3] episodes_seen=820 last_return=-207.0 (+1 eps) [worker 0] episodes_seen=810 last_return=-300.2 (+1 eps) [worker 2] episodes_seen=830 last_return=-213.4 (+1 eps) [worker 1] episodes_seen=820 last_return=-24.9 (+1 eps) [worker 3] episodes_seen=830 last_return=-447.1 (+1 eps) [worker 0] episodes_seen=820 last_return=-371.2 (+1 eps) [worker 2] episodes_seen=840 last_return=-149.3 (+1 eps) [worker 1] episodes_seen=830 last_return=-92.7 (+1 eps) [worker 0] episodes_seen=830 last_return=-227.2 (+1 eps) [worker 3] episodes_seen=840 last_return=-78.5 (+1 eps) [worker 2] episodes_seen=850 last_return=-93.0 (+1 eps) [worker 1] episodes_seen=840 last_return=-46.3 (+1 eps) [worker 3] episodes_seen=850 last_return=-90.9 (+1 eps) [worker 0] episodes_seen=840 last_return=-122.0 (+1 eps) [A2C][sync] it= 2503 steps= 300360 (+120) avg10= -86.42 loss=585.067 pg=-0.186 vf=1064.142 H=1.259 gn=2677.528 [worker 2] episodes_seen=860 last_return=-75.5 (+1 eps) [worker 1] episodes_seen=850 last_return=-158.1 (+1 eps) [worker 3] episodes_seen=860 last_return=-106.8 (+1 eps) [worker 0] episodes_seen=850 last_return=-183.0 (+1 eps) [worker 2] episodes_seen=870 last_return=-123.9 (+1 eps) [worker 1] episodes_seen=860 last_return=-123.0 (+1 eps) [worker 3] episodes_seen=870 last_return=-124.7 (+1 eps) [worker 0] episodes_seen=860 last_return=-113.8 (+1 eps) [worker 2] episodes_seen=880 last_return=-117.5 (+1 eps) [worker 1] episodes_seen=870 last_return=-296.7 (+1 eps) [worker 3] episodes_seen=880 last_return=-198.7 (+1 eps) [worker 0] episodes_seen=870 last_return=-181.8 (+1 eps) [worker 2] episodes_seen=890 last_return=-178.6 (+1 eps) [worker 1] episodes_seen=880 last_return=-166.6 (+1 eps) [worker 3] episodes_seen=890 last_return=-87.7 (+1 eps) [worker 0] episodes_seen=880 last_return=-81.1 (+1 eps) [worker 2] episodes_seen=900 last_return=-101.0 (+1 eps) [worker 1] episodes_seen=890 last_return=-289.9 (+1 eps) [worker 3] episodes_seen=900 last_return=-79.2 (+1 eps) [worker 0] episodes_seen=890 last_return=-243.6 (+1 eps) [worker 2] episodes_seen=910 last_return=-115.7 (+1 eps) [worker 1] episodes_seen=900 last_return=-235.4 (+1 eps) [worker 3] episodes_seen=910 last_return=-443.5 (+1 eps) [worker 0] episodes_seen=900 last_return=-234.6 (+1 eps) [worker 2] episodes_seen=920 last_return=-19.5 (+1 eps) [worker 1] episodes_seen=910 last_return=-198.5 (+1 eps) [worker 3] episodes_seen=920 last_return=-220.5 (+1 eps) [worker 0] episodes_seen=910 last_return=-36.7 (+1 eps) [worker 2] episodes_seen=930 last_return=-140.0 (+1 eps) [worker 1] episodes_seen=920 last_return=-220.9 (+1 eps) [worker 3] episodes_seen=930 last_return=-196.2 (+1 eps) [worker 0] episodes_seen=920 last_return=-91.8 (+1 eps) [worker 2] episodes_seen=940 last_return=-88.5 (+1 eps) [worker 1] episodes_seen=930 last_return=-73.1 (+1 eps) [worker 3] episodes_seen=940 last_return=-101.7 (+1 eps) [worker 0] episodes_seen=930 last_return=30.9 (+1 eps) [worker 2] episodes_seen=950 last_return=-115.7 (+1 eps) [worker 1] episodes_seen=940 last_return=-89.0 (+1 eps) [worker 3] episodes_seen=950 last_return=-86.1 (+1 eps) [worker 0] episodes_seen=940 last_return=-151.8 (+1 eps) [worker 2] episodes_seen=960 last_return=-81.4 (+1 eps) [worker 1] episodes_seen=950 last_return=-92.9 (+1 eps) [worker 3] episodes_seen=960 last_return=-98.5 (+1 eps) [worker 0] episodes_seen=950 last_return=83.9 (+1 eps) [worker 2] episodes_seen=970 last_return=-78.4 (+1 eps) [worker 1] episodes_seen=960 last_return=-92.2 (+1 eps) [worker 3] episodes_seen=970 last_return=-202.9 (+1 eps) [worker 0] episodes_seen=960 last_return=-98.4 (+1 eps) [worker 2] episodes_seen=980 last_return=-150.5 (+1 eps) [worker 1] episodes_seen=970 last_return=-147.1 (+1 eps) [worker 3] episodes_seen=980 last_return=-97.4 (+1 eps) [worker 0] episodes_seen=970 last_return=-112.7 (+1 eps) [worker 2] episodes_seen=990 last_return=-87.6 (+1 eps) [worker 1] episodes_seen=980 last_return=-107.6 (+1 eps) [worker 3] episodes_seen=990 last_return=81.1 (+1 eps) [worker 0] episodes_seen=980 last_return=-97.9 (+1 eps) [worker 1] episodes_seen=990 last_return=-319.0 (+1 eps) [worker 2] episodes_seen=1000 last_return=-99.2 (+1 eps) [worker 3] episodes_seen=1000 last_return=-161.2 (+1 eps) [worker 0] episodes_seen=990 last_return=-83.8 (+1 eps) [A2C][sync] it= 2920 steps= 350400 (+120) avg10=-106.28 loss=4328.506 pg=-0.061 vf=7870.161 H=1.083 gn=2779.813 [worker 1] episodes_seen=1000 last_return=-78.3 (+1 eps) [worker 2] episodes_seen=1010 last_return=-43.6 (+1 eps) [worker 3] episodes_seen=1010 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1000 last_return=-175.3 (+1 eps) [worker 1] episodes_seen=1010 last_return=-75.5 (+1 eps) [worker 2] episodes_seen=1020 last_return=-117.7 (+1 eps) [worker 3] episodes_seen=1020 last_return=-109.0 (+1 eps) [worker 0] episodes_seen=1010 last_return=-199.5 (+1 eps) [worker 1] episodes_seen=1020 last_return=-91.3 (+1 eps) [worker 2] episodes_seen=1030 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=1030 last_return=-129.8 (+1 eps) [worker 0] episodes_seen=1020 last_return=-256.2 (+1 eps) [worker 1] episodes_seen=1030 last_return=-266.0 (+1 eps) [worker 2] episodes_seen=1040 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=1040 last_return=-85.5 (+1 eps) [worker 0] episodes_seen=1030 last_return=-66.1 (+1 eps) [worker 1] episodes_seen=1040 last_return=-228.3 (+1 eps) [worker 2] episodes_seen=1050 last_return=-129.8 (+1 eps) [worker 3] episodes_seen=1050 last_return=-349.8 (+1 eps) [worker 0] episodes_seen=1040 last_return=-340.6 (+1 eps) [worker 1] episodes_seen=1050 last_return=-135.6 (+1 eps) [worker 2] episodes_seen=1060 last_return=-381.6 (+1 eps) [worker 3] episodes_seen=1060 last_return=-176.9 (+1 eps) [worker 0] episodes_seen=1050 last_return=-156.5 (+1 eps) [worker 2] episodes_seen=1070 last_return=-164.5 (+1 eps) [worker 1] episodes_seen=1060 last_return=-210.8 (+1 eps) [worker 3] episodes_seen=1070 last_return=-115.2 (+1 eps) [worker 0] episodes_seen=1060 last_return=-216.5 (+1 eps) [worker 1] episodes_seen=1070 last_return=-84.7 (+1 eps) [worker 2] episodes_seen=1080 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=1080 last_return=-108.9 (+1 eps) [worker 0] episodes_seen=1070 last_return=-98.9 (+1 eps) [worker 2] episodes_seen=1090 last_return=-398.2 (+1 eps) [worker 1] episodes_seen=1080 last_return=-160.7 (+1 eps) [worker 3] episodes_seen=1090 last_return=-377.6 (+1 eps) [worker 0] episodes_seen=1080 last_return=-217.1 (+1 eps) [worker 2] episodes_seen=1100 last_return=-93.1 (+1 eps) [worker 1] episodes_seen=1090 last_return=-99.9 (+1 eps) [worker 3] episodes_seen=1100 last_return=-72.5 (+1 eps) [worker 0] episodes_seen=1090 last_return=-81.8 (+1 eps) [worker 2] episodes_seen=1110 last_return=-181.8 (+1 eps) [worker 1] episodes_seen=1100 last_return=-88.5 (+1 eps) [worker 0] episodes_seen=1100 last_return=-202.0 (+1 eps) [worker 3] episodes_seen=1110 last_return=-214.2 (+1 eps) [worker 2] episodes_seen=1120 last_return=64.8 (+1 eps) [worker 1] episodes_seen=1110 last_return=-77.7 (+1 eps) [worker 0] episodes_seen=1110 last_return=-164.6 (+1 eps) [worker 3] episodes_seen=1120 last_return=-58.8 (+1 eps) [worker 2] episodes_seen=1130 last_return=-185.6 (+1 eps) [worker 1] episodes_seen=1120 last_return=-185.9 (+1 eps) [worker 0] episodes_seen=1120 last_return=-112.4 (+1 eps) [worker 3] episodes_seen=1130 last_return=-87.4 (+1 eps) [A2C][sync] it= 3337 steps= 400440 (+120) avg10= -78.64 loss=619.187 pg=0.031 vf=1125.781 H=1.221 gn=759.370 [worker 2] episodes_seen=1140 last_return=-85.0 (+1 eps) [worker 1] episodes_seen=1130 last_return=-57.4 (+1 eps) [worker 0] episodes_seen=1130 last_return=-131.7 (+1 eps) [worker 3] episodes_seen=1140 last_return=-93.3 (+1 eps) [worker 2] episodes_seen=1150 last_return=-150.3 (+1 eps) [worker 1] episodes_seen=1140 last_return=-106.8 (+1 eps) [worker 0] episodes_seen=1140 last_return=-95.8 (+1 eps) [worker 3] episodes_seen=1150 last_return=-125.7 (+1 eps) [worker 2] episodes_seen=1160 last_return=-116.8 (+1 eps) [worker 1] episodes_seen=1150 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1150 last_return=-115.2 (+1 eps) [worker 2] episodes_seen=1170 last_return=-104.1 (+1 eps) [worker 3] episodes_seen=1160 last_return=-88.4 (+1 eps) [worker 1] episodes_seen=1160 last_return=-62.9 (+1 eps) [worker 0] episodes_seen=1160 last_return=-202.4 (+1 eps) [worker 2] episodes_seen=1180 last_return=-80.6 (+1 eps) [worker 3] episodes_seen=1170 last_return=-90.3 (+1 eps) [worker 1] episodes_seen=1170 last_return=-74.0 (+1 eps) [worker 0] episodes_seen=1170 last_return=-86.4 (+1 eps) [worker 3] episodes_seen=1180 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1190 last_return=-186.6 (+1 eps) [worker 1] episodes_seen=1180 last_return=-84.4 (+1 eps) [worker 0] episodes_seen=1180 last_return=-114.2 (+1 eps) [worker 3] episodes_seen=1190 last_return=-99.1 (+1 eps) [worker 2] episodes_seen=1200 last_return=-221.5 (+1 eps) [worker 1] episodes_seen=1190 last_return=-101.7 (+1 eps) [worker 0] episodes_seen=1190 last_return=-98.9 (+1 eps) [worker 3] episodes_seen=1200 last_return=-30.6 (+1 eps) [worker 2] episodes_seen=1210 last_return=-113.7 (+1 eps) [worker 1] episodes_seen=1200 last_return=-108.5 (+1 eps) [worker 0] episodes_seen=1200 last_return=-168.3 (+1 eps) [worker 3] episodes_seen=1210 last_return=-441.1 (+1 eps) [worker 2] episodes_seen=1220 last_return=-430.4 (+1 eps) [worker 1] episodes_seen=1210 last_return=-299.6 (+1 eps) [worker 0] episodes_seen=1210 last_return=-147.7 (+1 eps) [worker 2] episodes_seen=1230 last_return=-287.5 (+1 eps) [worker 3] episodes_seen=1220 last_return=-400.1 (+1 eps) [worker 1] episodes_seen=1220 last_return=-315.5 (+1 eps) [worker 0] episodes_seen=1220 last_return=-152.9 (+1 eps) [worker 3] episodes_seen=1230 last_return=-331.0 (+1 eps) [worker 2] episodes_seen=1240 last_return=20.7 (+1 eps) [worker 1] episodes_seen=1230 last_return=-362.6 (+1 eps) [worker 0] episodes_seen=1230 last_return=-319.0 (+1 eps) [worker 1] episodes_seen=1240 last_return=-150.8 (+1 eps) [worker 2] episodes_seen=1250 last_return=-271.3 (+1 eps) [worker 3] episodes_seen=1240 last_return=-67.8 (+1 eps) [worker 0] episodes_seen=1240 last_return=-139.4 (+1 eps) [worker 1] episodes_seen=1250 last_return=-190.7 (+1 eps) [worker 3] episodes_seen=1250 last_return=-264.9 (+1 eps) [worker 2] episodes_seen=1260 last_return=-30.8 (+1 eps) [worker 0] episodes_seen=1250 last_return=-150.6 (+1 eps) [worker 1] episodes_seen=1260 last_return=-116.3 (+1 eps) [worker 3] episodes_seen=1260 last_return=-186.6 (+1 eps) [worker 2] episodes_seen=1270 last_return=-152.4 (+1 eps) [worker 1] episodes_seen=1270 last_return=-330.8 (+1 eps) [worker 0] episodes_seen=1260 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=1270 last_return=-188.4 (+1 eps) [worker 2] episodes_seen=1280 last_return=-394.2 (+1 eps) [A2C][sync] it= 3754 steps= 450480 (+120) avg10=-164.82 loss=14462.084 pg=-0.016 vf=26294.748 H=0.585 gn=26869.215 [worker 0] episodes_seen=1270 last_return=-207.2 (+1 eps) [worker 1] episodes_seen=1280 last_return=-313.0 (+1 eps) [worker 3] episodes_seen=1280 last_return=-381.7 (+1 eps) [worker 2] episodes_seen=1290 last_return=-122.8 (+1 eps) [worker 0] episodes_seen=1280 last_return=-111.2 (+1 eps) [worker 1] episodes_seen=1290 last_return=-144.6 (+1 eps) [worker 3] episodes_seen=1290 last_return=-256.1 (+1 eps) [worker 2] episodes_seen=1300 last_return=-268.4 (+1 eps) [worker 0] episodes_seen=1290 last_return=-296.7 (+1 eps) [worker 3] episodes_seen=1300 last_return=-368.9 (+1 eps) [worker 1] episodes_seen=1300 last_return=-193.6 (+1 eps) [worker 2] episodes_seen=1310 last_return=-309.8 (+1 eps) [worker 0] episodes_seen=1300 last_return=-398.9 (+1 eps) [worker 1] episodes_seen=1310 last_return=-250.5 (+1 eps) [worker 2] episodes_seen=1320 last_return=-274.2 (+1 eps) [worker 3] episodes_seen=1310 last_return=-316.1 (+1 eps) [worker 1] episodes_seen=1320 last_return=-73.9 (+1 eps) [worker 0] episodes_seen=1310 last_return=-146.6 (+1 eps) [worker 2] episodes_seen=1330 last_return=-279.6 (+1 eps) [worker 3] episodes_seen=1320 last_return=-242.8 (+1 eps) [worker 0] episodes_seen=1320 last_return=-91.7 (+1 eps) [worker 1] episodes_seen=1330 last_return=-69.7 (+1 eps) [worker 2] episodes_seen=1340 last_return=-93.6 (+1 eps) [worker 3] episodes_seen=1330 last_return=-154.1 (+1 eps) [worker 0] episodes_seen=1330 last_return=-129.9 (+1 eps) [worker 1] episodes_seen=1340 last_return=-110.4 (+1 eps) [worker 2] episodes_seen=1350 last_return=-97.8 (+1 eps) [worker 3] episodes_seen=1340 last_return=-185.8 (+1 eps) [worker 1] episodes_seen=1350 last_return=-114.0 (+1 eps) [worker 0] episodes_seen=1340 last_return=-92.5 (+1 eps) [worker 2] episodes_seen=1360 last_return=-98.5 (+1 eps) [worker 3] episodes_seen=1350 last_return=-91.5 (+1 eps) [worker 2] episodes_seen=1370 last_return=-102.3 (+1 eps) [worker 1] episodes_seen=1360 last_return=-126.9 (+1 eps) [worker 0] episodes_seen=1350 last_return=-114.0 (+1 eps) [worker 3] episodes_seen=1360 last_return=-163.1 (+1 eps) [worker 1] episodes_seen=1370 last_return=-110.9 (+1 eps) [worker 2] episodes_seen=1380 last_return=-125.6 (+1 eps) [worker 0] episodes_seen=1360 last_return=-92.9 (+1 eps) [worker 3] episodes_seen=1370 last_return=-135.2 (+1 eps) [worker 2] episodes_seen=1390 last_return=-76.7 (+1 eps) [worker 0] episodes_seen=1370 last_return=49.6 (+1 eps) [worker 1] episodes_seen=1380 last_return=-102.6 (+1 eps) [worker 3] episodes_seen=1380 last_return=-105.6 (+1 eps) [worker 2] episodes_seen=1400 last_return=-140.1 (+1 eps) [worker 3] episodes_seen=1390 last_return=-239.7 (+1 eps) [worker 1] episodes_seen=1390 last_return=-274.8 (+1 eps) [worker 0] episodes_seen=1380 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1410 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1390 last_return=-175.0 (+1 eps) [worker 1] episodes_seen=1400 last_return=-120.6 (+1 eps) [worker 3] episodes_seen=1400 last_return=-214.2 (+1 eps) [worker 2] episodes_seen=1420 last_return=-94.0 (+1 eps) [worker 0] episodes_seen=1400 last_return=-122.2 (+1 eps) [worker 1] episodes_seen=1410 last_return=-74.6 (+1 eps) [worker 3] episodes_seen=1410 last_return=-134.4 (+1 eps) [worker 2] episodes_seen=1430 last_return=-180.3 (+1 eps) [A2C][sync] it= 4171 steps= 500520 (+120) avg10=-177.25 loss=1598.945 pg=0.015 vf=2907.190 H=1.244 gn=2803.794 [worker 0] episodes_seen=1410 last_return=-117.4 (+1 eps) [worker 1] episodes_seen=1420 last_return=-49.6 (+1 eps) [worker 3] episodes_seen=1420 last_return=-95.5 (+1 eps) [worker 2] episodes_seen=1440 last_return=-144.1 (+1 eps) [worker 0] episodes_seen=1420 last_return=-159.2 (+1 eps) [worker 3] episodes_seen=1430 last_return=-236.7 (+1 eps) [worker 1] episodes_seen=1430 last_return=-276.8 (+1 eps) [worker 2] episodes_seen=1450 last_return=-183.3 (+1 eps) [worker 0] episodes_seen=1430 last_return=-242.7 (+1 eps) [worker 3] episodes_seen=1440 last_return=-110.2 (+1 eps) [worker 1] episodes_seen=1440 last_return=-361.1 (+1 eps) [worker 2] episodes_seen=1460 last_return=-94.6 (+1 eps) [worker 0] episodes_seen=1440 last_return=-82.5 (+1 eps) [worker 3] episodes_seen=1450 last_return=-68.1 (+1 eps) [worker 1] episodes_seen=1450 last_return=-101.8 (+1 eps) [worker 2] episodes_seen=1470 last_return=-85.3 (+1 eps) [worker 0] episodes_seen=1450 last_return=-180.0 (+1 eps) [worker 3] episodes_seen=1460 last_return=-20.5 (+1 eps) [worker 1] episodes_seen=1460 last_return=-93.8 (+1 eps) [worker 2] episodes_seen=1480 last_return=-123.1 (+1 eps) [worker 3] episodes_seen=1470 last_return=-94.0 (+1 eps) [worker 0] episodes_seen=1460 last_return=-21.8 (+1 eps) [worker 1] episodes_seen=1470 last_return=-85.0 (+1 eps) [worker 3] episodes_seen=1480 last_return=-89.8 (+1 eps) [worker 2] episodes_seen=1490 last_return=-109.8 (+1 eps) [worker 0] episodes_seen=1470 last_return=-188.7 (+1 eps) [worker 1] episodes_seen=1480 last_return=-222.2 (+1 eps) [worker 3] episodes_seen=1490 last_return=-94.3 (+1 eps) [worker 0] episodes_seen=1480 last_return=-172.8 (+1 eps) [worker 1] episodes_seen=1490 last_return=-108.0 (+1 eps) [worker 2] episodes_seen=1500 last_return=-15.5 (+1 eps) [worker 3] episodes_seen=1500 last_return=-84.8 (+1 eps) [worker 0] episodes_seen=1490 last_return=-114.9 (+1 eps) [worker 1] episodes_seen=1500 last_return=-71.3 (+1 eps) [worker 2] episodes_seen=1510 last_return=-200.2 (+1 eps) [worker 3] episodes_seen=1510 last_return=-12.4 (+1 eps) [worker 0] episodes_seen=1500 last_return=-125.1 (+1 eps) [worker 1] episodes_seen=1510 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=1520 last_return=-81.6 (+1 eps) [worker 2] episodes_seen=1520 last_return=-125.0 (+1 eps) [worker 0] episodes_seen=1510 last_return=-57.6 (+1 eps) [worker 1] episodes_seen=1520 last_return=-93.5 (+1 eps) [worker 2] episodes_seen=1530 last_return=-367.9 (+1 eps) [worker 3] episodes_seen=1530 last_return=-110.4 (+1 eps) [worker 0] episodes_seen=1520 last_return=65.1 (+1 eps) [worker 1] episodes_seen=1530 last_return=-60.5 (+1 eps) [worker 3] episodes_seen=1540 last_return=-90.0 (+1 eps) [worker 2] episodes_seen=1540 last_return=-32.0 (+1 eps) [worker 0] episodes_seen=1530 last_return=-67.1 (+1 eps) [worker 1] episodes_seen=1540 last_return=-428.2 (+1 eps) [worker 3] episodes_seen=1550 last_return=-288.0 (+1 eps) [worker 2] episodes_seen=1550 last_return=-250.4 (+1 eps) [A2C][sync] it= 4588 steps= 550560 (+120) avg10=-190.28 loss=3601.199 pg=-0.215 vf=6548.057 H=0.842 gn=2616.828 [worker 0] episodes_seen=1540 last_return=-233.7 (+1 eps) [worker 3] episodes_seen=1560 last_return=-28.8 (+1 eps) [worker 1] episodes_seen=1550 last_return=-117.9 (+1 eps) [worker 2] episodes_seen=1560 last_return=-199.5 (+1 eps) [worker 0] episodes_seen=1550 last_return=-104.5 (+1 eps) [worker 1] episodes_seen=1560 last_return=-144.0 (+1 eps) [worker 3] episodes_seen=1570 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1570 last_return=-126.1 (+1 eps) [worker 0] episodes_seen=1560 last_return=-52.2 (+1 eps) [worker 1] episodes_seen=1570 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=1580 last_return=-97.6 (+1 eps) [worker 2] episodes_seen=1580 last_return=-117.6 (+1 eps) [worker 0] episodes_seen=1570 last_return=-131.6 (+1 eps) [worker 3] episodes_seen=1590 last_return=-265.8 (+1 eps) [worker 1] episodes_seen=1580 last_return=-209.2 (+1 eps) [worker 2] episodes_seen=1590 last_return=-122.0 (+1 eps) [worker 0] episodes_seen=1580 last_return=-18.2 (+1 eps) [worker 1] episodes_seen=1590 last_return=-80.4 (+1 eps) [worker 3] episodes_seen=1600 last_return=-74.2 (+1 eps) [worker 2] episodes_seen=1600 last_return=-141.8 (+1 eps) [worker 3] episodes_seen=1610 last_return=-79.8 (+1 eps) [worker 0] episodes_seen=1590 last_return=-79.4 (+1 eps) [worker 1] episodes_seen=1600 last_return=-69.4 (+1 eps) [worker 2] episodes_seen=1610 last_return=-133.0 (+1 eps) [worker 1] episodes_seen=1610 last_return=-331.4 (+1 eps) [worker 3] episodes_seen=1620 last_return=-240.9 (+1 eps) [worker 0] episodes_seen=1600 last_return=-142.3 (+1 eps) [worker 2] episodes_seen=1620 last_return=-307.8 (+1 eps) [worker 0] episodes_seen=1610 last_return=-334.8 (+1 eps) [worker 3] episodes_seen=1630 last_return=-487.3 (+1 eps) [worker 1] episodes_seen=1620 last_return=-333.3 (+1 eps) [worker 2] episodes_seen=1630 last_return=-163.1 (+1 eps) [worker 0] episodes_seen=1620 last_return=-161.1 (+1 eps) [worker 3] episodes_seen=1640 last_return=-260.7 (+1 eps) [worker 1] episodes_seen=1630 last_return=-198.3 (+1 eps) [worker 2] episodes_seen=1640 last_return=-254.8 (+1 eps) [worker 0] episodes_seen=1630 last_return=-210.1 (+1 eps) [worker 3] episodes_seen=1650 last_return=-437.5 (+1 eps) [worker 1] episodes_seen=1640 last_return=-385.1 (+1 eps) [worker 2] episodes_seen=1650 last_return=-72.9 (+1 eps) [worker 0] episodes_seen=1640 last_return=-210.2 (+1 eps) [worker 3] episodes_seen=1660 last_return=-197.4 (+1 eps) [worker 1] episodes_seen=1650 last_return=-457.2 (+1 eps) [worker 2] episodes_seen=1660 last_return=-339.4 (+1 eps) [worker 3] episodes_seen=1670 last_return=-361.1 (+1 eps) [worker 0] episodes_seen=1650 last_return=-389.3 (+1 eps) [worker 1] episodes_seen=1660 last_return=-167.3 (+1 eps) [worker 2] episodes_seen=1670 last_return=-377.0 (+1 eps) [worker 0] episodes_seen=1660 last_return=-353.2 (+1 eps) [worker 3] episodes_seen=1680 last_return=-259.8 (+1 eps) [worker 1] episodes_seen=1670 last_return=-322.4 (+1 eps) [worker 2] episodes_seen=1680 last_return=-114.1 (+1 eps) [worker 0] episodes_seen=1670 last_return=-308.5 (+1 eps) [worker 3] episodes_seen=1690 last_return=-349.0 (+1 eps) [worker 1] episodes_seen=1680 last_return=-370.7 (+1 eps) [A2C][sync] it= 5005 steps= 600600 (+120) avg10=-220.81 loss=2631.936 pg=0.028 vf=4785.306 H=0.547 gn=11369.632 [worker 2] episodes_seen=1690 last_return=-200.5 (+1 eps) [worker 0] episodes_seen=1680 last_return=-239.9 (+1 eps) [worker 3] episodes_seen=1700 last_return=-328.8 (+1 eps) [worker 1] episodes_seen=1690 last_return=-114.8 (+1 eps) [worker 2] episodes_seen=1700 last_return=-253.8 (+1 eps) [worker 0] episodes_seen=1690 last_return=-255.4 (+1 eps) [worker 3] episodes_seen=1710 last_return=-141.6 (+1 eps) [worker 1] episodes_seen=1700 last_return=-327.9 (+1 eps) [worker 2] episodes_seen=1710 last_return=-292.2 (+1 eps) [worker 0] episodes_seen=1700 last_return=-214.7 (+1 eps) [worker 3] episodes_seen=1720 last_return=-179.2 (+1 eps) [worker 1] episodes_seen=1710 last_return=31.1 (+1 eps) [worker 2] episodes_seen=1720 last_return=-240.1 (+1 eps) [worker 0] episodes_seen=1710 last_return=-328.5 (+1 eps) [worker 3] episodes_seen=1730 last_return=-313.4 (+1 eps) [worker 1] episodes_seen=1720 last_return=-445.4 (+1 eps) [worker 2] episodes_seen=1730 last_return=-242.8 (+1 eps) [worker 0] episodes_seen=1720 last_return=-172.1 (+1 eps) [worker 3] episodes_seen=1740 last_return=-349.7 (+1 eps) [worker 1] episodes_seen=1730 last_return=-278.4 (+1 eps) [worker 2] episodes_seen=1740 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1730 last_return=-108.4 (+1 eps) [worker 3] episodes_seen=1750 last_return=-268.3 (+1 eps) [worker 1] episodes_seen=1740 last_return=-315.3 (+1 eps) [worker 2] episodes_seen=1750 last_return=-362.8 (+1 eps) [worker 0] episodes_seen=1740 last_return=-248.2 (+1 eps) [worker 3] episodes_seen=1760 last_return=-297.7 (+1 eps) [worker 2] episodes_seen=1760 last_return=-269.2 (+1 eps) [worker 1] episodes_seen=1750 last_return=-236.6 (+1 eps) [worker 0] episodes_seen=1750 last_return=-322.1 (+1 eps) [worker 3] episodes_seen=1770 last_return=-224.6 (+1 eps) [worker 1] episodes_seen=1760 last_return=-113.1 (+1 eps) [worker 2] episodes_seen=1770 last_return=-225.2 (+1 eps) [worker 0] episodes_seen=1760 last_return=-119.5 (+1 eps) [worker 3] episodes_seen=1780 last_return=-138.0 (+1 eps) [worker 1] episodes_seen=1770 last_return=-200.3 (+1 eps) [worker 2] episodes_seen=1780 last_return=-252.1 (+1 eps) [worker 0] episodes_seen=1770 last_return=-126.9 (+1 eps) [worker 3] episodes_seen=1790 last_return=-142.9 (+1 eps) [worker 1] episodes_seen=1780 last_return=-303.2 (+1 eps) [worker 2] episodes_seen=1790 last_return=-177.6 (+1 eps) [worker 0] episodes_seen=1780 last_return=-103.4 (+1 eps) [worker 3] episodes_seen=1800 last_return=-102.6 (+1 eps) [worker 1] episodes_seen=1790 last_return=-103.7 (+1 eps) [worker 2] episodes_seen=1800 last_return=-87.5 (+1 eps) [worker 0] episodes_seen=1790 last_return=-94.3 (+1 eps) [worker 3] episodes_seen=1810 last_return=-67.2 (+1 eps) [worker 2] episodes_seen=1810 last_return=-84.4 (+1 eps) [worker 1] episodes_seen=1800 last_return=-25.3 (+1 eps) [worker 0] episodes_seen=1800 last_return=-248.0 (+1 eps) [worker 3] episodes_seen=1820 last_return=-105.0 (+1 eps) [worker 2] episodes_seen=1820 last_return=-112.8 (+1 eps) [worker 1] episodes_seen=1810 last_return=-100.1 (+1 eps) [A2C][sync] it= 5422 steps= 650640 (+120) avg10= -72.57 loss=479.815 pg=-0.052 vf=872.526 H=1.117 gn=940.143 [worker 0] episodes_seen=1810 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1830 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1820 last_return=-94.3 (+1 eps) [worker 0] episodes_seen=1820 last_return=-132.9 (+1 eps) [worker 3] episodes_seen=1830 last_return=-96.9 (+1 eps) [worker 2] episodes_seen=1840 last_return=-107.0 (+1 eps) [worker 1] episodes_seen=1830 last_return=-100.7 (+1 eps) [worker 0] episodes_seen=1830 last_return=-81.7 (+1 eps) [worker 3] episodes_seen=1840 last_return=-151.0 (+1 eps) [worker 2] episodes_seen=1850 last_return=-169.3 (+1 eps) [worker 1] episodes_seen=1840 last_return=-161.8 (+1 eps) [worker 0] episodes_seen=1840 last_return=-191.5 (+1 eps) [worker 3] episodes_seen=1850 last_return=-204.6 (+1 eps) [worker 2] episodes_seen=1860 last_return=-79.8 (+1 eps) [worker 1] episodes_seen=1850 last_return=-120.2 (+1 eps) [worker 0] episodes_seen=1850 last_return=-136.1 (+1 eps) [worker 3] episodes_seen=1860 last_return=-115.0 (+1 eps) [worker 2] episodes_seen=1870 last_return=-158.3 (+1 eps) [worker 0] episodes_seen=1860 last_return=-97.9 (+1 eps) [worker 1] episodes_seen=1860 last_return=-91.8 (+1 eps) [worker 3] episodes_seen=1870 last_return=-112.7 (+1 eps) [worker 2] episodes_seen=1880 last_return=-102.5 (+1 eps) [worker 1] episodes_seen=1870 last_return=-78.0 (+1 eps) [worker 0] episodes_seen=1870 last_return=-161.7 (+1 eps) [worker 3] episodes_seen=1880 last_return=-87.5 (+1 eps) [worker 2] episodes_seen=1890 last_return=-69.1 (+1 eps) [worker 0] episodes_seen=1880 last_return=-102.2 (+1 eps) [worker 1] episodes_seen=1880 last_return=-84.9 (+1 eps) [worker 3] episodes_seen=1890 last_return=-119.6 (+1 eps) [worker 2] episodes_seen=1900 last_return=-57.9 (+1 eps) [worker 0] episodes_seen=1890 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1890 last_return=-82.5 (+1 eps) [worker 3] episodes_seen=1900 last_return=-121.1 (+1 eps) [worker 2] episodes_seen=1910 last_return=-225.9 (+1 eps) [worker 0] episodes_seen=1900 last_return=-6.1 (+1 eps) [worker 1] episodes_seen=1900 last_return=-274.3 (+1 eps) [worker 3] episodes_seen=1910 last_return=-123.3 (+1 eps) [worker 2] episodes_seen=1920 last_return=-164.1 (+1 eps) [worker 0] episodes_seen=1910 last_return=-100.2 (+1 eps) [worker 1] episodes_seen=1910 last_return=-71.1 (+1 eps) [worker 3] episodes_seen=1920 last_return=-149.9 (+1 eps) [worker 2] episodes_seen=1930 last_return=-75.8 (+1 eps) [worker 0] episodes_seen=1920 last_return=-269.0 (+1 eps) [worker 1] episodes_seen=1920 last_return=-110.9 (+1 eps) [worker 3] episodes_seen=1930 last_return=-262.4 (+1 eps) [worker 2] episodes_seen=1940 last_return=-297.2 (+1 eps) [worker 0] episodes_seen=1930 last_return=-183.8 (+1 eps) [worker 1] episodes_seen=1930 last_return=-204.6 (+1 eps) [worker 3] episodes_seen=1940 last_return=-336.2 (+1 eps) [worker 2] episodes_seen=1950 last_return=-281.3 (+1 eps) [A2C][sync] it= 5839 steps= 700680 (+120) avg10=-197.92 loss=4870.306 pg=0.221 vf=8854.729 H=0.842 gn=9200.188 [worker 0] episodes_seen=1940 last_return=-169.9 (+1 eps) [worker 1] episodes_seen=1940 last_return=-162.0 (+1 eps) [worker 3] episodes_seen=1950 last_return=-93.3 (+1 eps) [worker 2] episodes_seen=1960 last_return=-95.9 (+1 eps) [worker 0] episodes_seen=1950 last_return=-95.9 (+1 eps) [worker 1] episodes_seen=1950 last_return=-65.7 (+1 eps) [worker 3] episodes_seen=1960 last_return=-17.9 (+1 eps) [worker 2] episodes_seen=1970 last_return=-91.6 (+1 eps) [worker 0] episodes_seen=1960 last_return=-50.9 (+1 eps) [worker 1] episodes_seen=1960 last_return=8.6 (+1 eps) [worker 3] episodes_seen=1970 last_return=58.7 (+1 eps) [worker 2] episodes_seen=1980 last_return=-88.0 (+1 eps) [worker 0] episodes_seen=1970 last_return=-68.8 (+1 eps) [worker 1] episodes_seen=1970 last_return=-119.5 (+1 eps) [worker 3] episodes_seen=1980 last_return=-66.7 (+1 eps) [worker 2] episodes_seen=1990 last_return=-117.5 (+1 eps) [worker 1] episodes_seen=1980 last_return=-87.2 (+1 eps) [worker 0] episodes_seen=1980 last_return=-95.0 (+1 eps) [worker 3] episodes_seen=1990 last_return=-121.6 (+1 eps) [worker 2] episodes_seen=2000 last_return=-81.2 (+1 eps) [worker 1] episodes_seen=1990 last_return=-99.6 (+1 eps) [worker 0] episodes_seen=1990 last_return=-81.7 (+1 eps) [worker 3] episodes_seen=2000 last_return=52.5 (+1 eps) [worker 2] episodes_seen=2010 last_return=-165.6 (+1 eps) [worker 1] episodes_seen=2000 last_return=-82.7 (+1 eps) [worker 0] episodes_seen=2000 last_return=-94.9 (+1 eps) [worker 3] episodes_seen=2010 last_return=-100.1 (+1 eps) [worker 2] episodes_seen=2020 last_return=-44.5 (+1 eps) [worker 1] episodes_seen=2010 last_return=-73.1 (+1 eps) [worker 0] episodes_seen=2010 last_return=-37.7 (+1 eps) [worker 3] episodes_seen=2020 last_return=-33.1 (+1 eps) [worker 2] episodes_seen=2030 last_return=-57.3 (+1 eps) [worker 1] episodes_seen=2020 last_return=-111.4 (+1 eps) [worker 0] episodes_seen=2020 last_return=-187.7 (+1 eps) [worker 3] episodes_seen=2030 last_return=-222.8 (+1 eps) [worker 2] episodes_seen=2040 last_return=-159.4 (+1 eps) [worker 1] episodes_seen=2030 last_return=-58.1 (+1 eps) [worker 0] episodes_seen=2030 last_return=-92.0 (+1 eps) [worker 3] episodes_seen=2040 last_return=-42.1 (+1 eps) [worker 1] episodes_seen=2040 last_return=-73.2 (+1 eps) [worker 2] episodes_seen=2050 last_return=-76.8 (+1 eps) [worker 0] episodes_seen=2040 last_return=25.7 (+1 eps) [A2C][sync] it= 6256 steps= 750720 (+120) avg10= -87.82 loss=822.092 pg=0.080 vf=1494.608 H=1.120 gn=2599.072 [worker 1] episodes_seen=2050 last_return=-45.2 (+1 eps) [worker 2] episodes_seen=2060 last_return=-25.6 (+1 eps) [worker 3] episodes_seen=2050 last_return=-70.9 (+1 eps) [worker 0] episodes_seen=2050 last_return=-140.4 (+1 eps) [worker 1] episodes_seen=2060 last_return=-198.1 (+1 eps) [worker 2] episodes_seen=2070 last_return=-108.3 (+1 eps) [worker 0] episodes_seen=2060 last_return=-83.7 (+1 eps) [worker 1] episodes_seen=2070 last_return=-84.4 (+1 eps) [worker 3] episodes_seen=2060 last_return=-56.9 (+1 eps) [worker 0] episodes_seen=2070 last_return=-87.8 (+1 eps) [worker 2] episodes_seen=2080 last_return=-76.0 (+1 eps) [worker 1] episodes_seen=2080 last_return=-52.1 (+1 eps) [worker 3] episodes_seen=2070 last_return=-78.7 (+1 eps) [worker 0] episodes_seen=2080 last_return=-77.3 (+1 eps) [worker 2] episodes_seen=2090 last_return=-85.4 (+1 eps) [worker 1] episodes_seen=2090 last_return=-80.3 (+1 eps) [worker 3] episodes_seen=2080 last_return=-67.1 (+1 eps) [worker 0] episodes_seen=2090 last_return=-120.0 (+1 eps) [worker 3] episodes_seen=2090 last_return=-139.0 (+1 eps) [worker 1] episodes_seen=2100 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=2100 last_return=-127.8 (+1 eps) [worker 0] episodes_seen=2100 last_return=-36.9 (+1 eps) [worker 3] episodes_seen=2100 last_return=-79.0 (+1 eps) [worker 2] episodes_seen=2110 last_return=-85.0 (+1 eps) [worker 0] episodes_seen=2110 last_return=-92.0 (+1 eps) [worker 1] episodes_seen=2110 last_return=-39.9 (+1 eps) [worker 3] episodes_seen=2110 last_return=-51.2 (+1 eps) [worker 0] episodes_seen=2120 last_return=-61.8 (+1 eps) [worker 2] episodes_seen=2120 last_return=-224.6 (+1 eps) [worker 1] episodes_seen=2120 last_return=-60.5 (+1 eps) [worker 3] episodes_seen=2120 last_return=-99.7 (+1 eps) [worker 0] episodes_seen=2130 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=2130 last_return=-50.0 (+1 eps) [worker 1] episodes_seen=2130 last_return=-53.7 (+1 eps) [A2C][sync] it= 6673 steps= 800760 (+120) avg10= -65.32 loss=404.867 pg=-0.125 vf=736.395 H=1.243 gn=1030.766 [worker 3] episodes_seen=2130 last_return=-36.5 (+1 eps) [worker 0] episodes_seen=2140 last_return=-75.4 (+1 eps) [worker 2] episodes_seen=2140 last_return=-110.5 (+1 eps) [worker 1] episodes_seen=2140 last_return=-80.6 (+1 eps) [worker 3] episodes_seen=2140 last_return=-38.7 (+1 eps) [worker 2] episodes_seen=2150 last_return=-31.7 (+1 eps) [worker 1] episodes_seen=2150 last_return=-14.4 (+1 eps) [worker 0] episodes_seen=2150 last_return=-36.0 (+1 eps) [worker 2] episodes_seen=2160 last_return=-36.0 (+1 eps) [worker 3] episodes_seen=2150 last_return=-65.6 (+1 eps) [worker 1] episodes_seen=2160 last_return=-49.9 (+1 eps) [worker 0] episodes_seen=2160 last_return=-150.9 (+1 eps) [worker 2] episodes_seen=2170 last_return=-148.4 (+1 eps) [worker 3] episodes_seen=2160 last_return=-133.3 (+1 eps) [worker 1] episodes_seen=2170 last_return=-78.0 (+1 eps) [worker 0] episodes_seen=2170 last_return=-77.2 (+1 eps) [worker 1] episodes_seen=2180 last_return=-76.0 (+1 eps) [worker 2] episodes_seen=2180 last_return=23.0 (+1 eps) [worker 0] episodes_seen=2180 last_return=-69.0 (+1 eps) [worker 3] episodes_seen=2170 last_return=-49.1 (+1 eps) [worker 1] episodes_seen=2190 last_return=-62.1 (+1 eps) [worker 2] episodes_seen=2190 last_return=-62.4 (+1 eps) [worker 3] episodes_seen=2180 last_return=-199.6 (+1 eps) [worker 0] episodes_seen=2190 last_return=-71.0 (+1 eps) [worker 1] episodes_seen=2200 last_return=-68.6 (+1 eps) [worker 2] episodes_seen=2200 last_return=-52.2 (+1 eps) [worker 3] episodes_seen=2190 last_return=-49.2 (+1 eps) [worker 0] episodes_seen=2200 last_return=-67.0 (+1 eps) [worker 2] episodes_seen=2210 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2210 last_return=-87.9 (+1 eps) [A2C][sync] it= 7090 steps= 850800 (+120) avg10= -84.25 loss=181.074 pg=-0.034 vf=329.336 H=1.301 gn=931.526 [worker 3] episodes_seen=2200 last_return=-75.9 (+1 eps) [worker 2] episodes_seen=2220 last_return=-54.7 (+1 eps) [worker 0] episodes_seen=2210 last_return=-75.0 (+1 eps) [worker 1] episodes_seen=2220 last_return=-99.2 (+1 eps) [worker 3] episodes_seen=2210 last_return=-63.1 (+1 eps) [worker 0] episodes_seen=2220 last_return=-30.2 (+1 eps) [worker 2] episodes_seen=2230 last_return=-90.0 (+1 eps) [worker 1] episodes_seen=2230 last_return=-40.2 (+1 eps) [worker 3] episodes_seen=2220 last_return=-113.1 (+1 eps) [worker 2] episodes_seen=2240 last_return=-101.0 (+1 eps) [worker 0] episodes_seen=2230 last_return=-96.4 (+1 eps) [worker 3] episodes_seen=2230 last_return=-7.2 (+1 eps) [worker 2] episodes_seen=2250 last_return=-60.7 (+1 eps) [worker 3] episodes_seen=2240 last_return=-98.0 (+1 eps) [worker 1] episodes_seen=2240 last_return=-77.2 (+1 eps) [worker 2] episodes_seen=2260 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=2240 last_return=-112.7 (+1 eps) [worker 3] episodes_seen=2250 last_return=-34.7 (+1 eps) [worker 1] episodes_seen=2250 last_return=-23.6 (+1 eps) [worker 2] episodes_seen=2270 last_return=-102.8 (+1 eps) [worker 3] episodes_seen=2260 last_return=-66.0 (+1 eps) [worker 1] episodes_seen=2260 last_return=-53.1 (+1 eps) [worker 0] episodes_seen=2250 last_return=-179.3 (+1 eps) [worker 2] episodes_seen=2280 last_return=-161.2 (+1 eps) [worker 0] episodes_seen=2260 last_return=-82.5 (+1 eps) [worker 1] episodes_seen=2270 last_return=-79.9 (+1 eps) [worker 3] episodes_seen=2270 last_return=-43.7 (+1 eps) [A2C][sync] it= 7507 steps= 900840 (+120) avg10= -67.36 loss=61.005 pg=-0.147 vf=111.227 H=1.119 gn=445.873 [worker 2] episodes_seen=2290 last_return=-13.8 (+1 eps) [worker 3] episodes_seen=2280 last_return=-87.1 (+1 eps) [worker 0] episodes_seen=2270 last_return=-51.8 (+1 eps) [worker 1] episodes_seen=2280 last_return=-23.5 (+1 eps) [worker 2] episodes_seen=2300 last_return=-44.4 (+1 eps) [worker 1] episodes_seen=2290 last_return=-105.7 (+1 eps) [worker 0] episodes_seen=2280 last_return=-35.9 (+1 eps) [worker 3] episodes_seen=2290 last_return=-61.8 (+1 eps) [worker 1] episodes_seen=2300 last_return=-47.9 (+1 eps) [worker 2] episodes_seen=2310 last_return=-70.6 (+1 eps) [worker 3] episodes_seen=2300 last_return=-38.6 (+1 eps) [worker 0] episodes_seen=2290 last_return=-79.6 (+1 eps) [worker 1] episodes_seen=2310 last_return=-50.5 (+1 eps) [worker 2] episodes_seen=2320 last_return=-84.4 (+1 eps) [worker 3] episodes_seen=2310 last_return=-64.9 (+1 eps) [worker 0] episodes_seen=2300 last_return=-112.0 (+1 eps) [worker 1] episodes_seen=2320 last_return=-58.0 (+1 eps) [worker 2] episodes_seen=2330 last_return=-39.9 (+1 eps) [worker 3] episodes_seen=2320 last_return=-49.8 (+1 eps) [A2C][sync] it= 7924 steps= 950880 (+120) avg10= -91.15 loss=104.591 pg=0.094 vf=190.029 H=0.973 gn=790.975 [worker 0] episodes_seen=2310 last_return=-61.7 (+1 eps) [worker 2] episodes_seen=2340 last_return=-58.9 (+1 eps) [worker 3] episodes_seen=2330 last_return=-107.7 (+1 eps) [worker 1] episodes_seen=2330 last_return=-102.9 (+1 eps) [worker 0] episodes_seen=2320 last_return=-175.7 (+1 eps) [worker 2] episodes_seen=2350 last_return=-242.0 (+1 eps) [worker 3] episodes_seen=2340 last_return=-319.9 (+1 eps) [worker 1] episodes_seen=2340 last_return=-113.9 (+1 eps) [worker 0] episodes_seen=2330 last_return=-396.7 (+1 eps) [worker 2] episodes_seen=2360 last_return=-126.3 (+1 eps) [worker 3] episodes_seen=2350 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2350 last_return=-305.4 (+1 eps) [worker 0] episodes_seen=2340 last_return=-247.2 (+1 eps) [worker 2] episodes_seen=2370 last_return=-285.2 (+1 eps) [worker 3] episodes_seen=2360 last_return=-219.7 (+1 eps) [worker 1] episodes_seen=2360 last_return=-113.7 (+1 eps) [worker 0] episodes_seen=2350 last_return=-274.3 (+1 eps) [worker 2] episodes_seen=2380 last_return=-220.9 (+1 eps) [worker 3] episodes_seen=2370 last_return=-192.8 (+1 eps) [worker 1] episodes_seen=2370 last_return=-196.6 (+1 eps) [worker 0] episodes_seen=2360 last_return=-56.8 (+1 eps) [worker 2] episodes_seen=2390 last_return=-50.9 (+1 eps) [worker 3] episodes_seen=2380 last_return=-73.3 (+1 eps) [worker 1] episodes_seen=2380 last_return=-63.8 (+1 eps) [worker 0] episodes_seen=2370 last_return=-86.3 (+1 eps) [worker 2] episodes_seen=2400 last_return=-108.0 (+1 eps) [worker 1] episodes_seen=2390 last_return=-39.9 (+1 eps) [worker 0] episodes_seen=2380 last_return=-118.7 (+1 eps) [worker 3] episodes_seen=2390 last_return=-102.6 (+1 eps) [worker 2] episodes_seen=2410 last_return=-31.6 (+1 eps) [worker 0] episodes_seen=2390 last_return=-131.3 (+1 eps) [worker 1] episodes_seen=2400 last_return=-127.5 (+1 eps) [worker 3] episodes_seen=2400 last_return=-104.5 (+1 eps) [worker 2] episodes_seen=2420 last_return=-80.9 (+1 eps) [worker 0] episodes_seen=2400 last_return=-75.8 (+1 eps) [worker 1] episodes_seen=2410 last_return=-48.2 (+1 eps) [worker 2] episodes_seen=2430 last_return=-44.7 (+1 eps) [A2C][sync] it= 8341 steps= 1000920 (+120) avg10= -62.64 loss=46.198 pg=-0.051 vf=84.136 H=1.273 gn=258.444 [worker 3] episodes_seen=2410 last_return=-79.5 (+1 eps) [worker 0] episodes_seen=2410 last_return=-26.4 (+1 eps) [worker 1] episodes_seen=2420 last_return=-81.1 (+1 eps) [worker 3] episodes_seen=2420 last_return=-65.8 (+1 eps) [worker 2] episodes_seen=2440 last_return=-82.6 (+1 eps) [worker 1] episodes_seen=2430 last_return=-85.3 (+1 eps) [worker 0] episodes_seen=2420 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=2430 last_return=-81.9 (+1 eps) [worker 2] episodes_seen=2450 last_return=-0.2 (+1 eps) [worker 3] episodes_seen=2440 last_return=-62.8 (+1 eps) [worker 0] episodes_seen=2430 last_return=-58.2 (+1 eps) [worker 1] episodes_seen=2440 last_return=-78.7 (+1 eps) [worker 2] episodes_seen=2460 last_return=-97.7 (+1 eps) [worker 1] episodes_seen=2450 last_return=-37.5 (+1 eps) [worker 3] episodes_seen=2450 last_return=-50.2 (+1 eps) [worker 0] episodes_seen=2440 last_return=-112.7 (+1 eps) [worker 1] episodes_seen=2460 last_return=-34.3 (+1 eps) [worker 3] episodes_seen=2460 last_return=-87.2 (+1 eps) [worker 2] episodes_seen=2470 last_return=-69.6 (+1 eps) [worker 0] episodes_seen=2450 last_return=-87.6 (+1 eps) [worker 3] episodes_seen=2470 last_return=-58.7 (+1 eps) [worker 1] episodes_seen=2470 last_return=-70.3 (+1 eps) [A2C][sync] it= 8758 steps= 1050960 (+120) avg10= -63.83 loss=184.766 pg=-0.032 vf=336.041 H=1.224 gn=1141.857 [worker 2] episodes_seen=2480 last_return=-212.4 (+1 eps) [worker 1] episodes_seen=2480 last_return=-75.0 (+1 eps) [worker 3] episodes_seen=2480 last_return=-73.5 (+1 eps) [worker 0] episodes_seen=2460 last_return=-65.9 (+1 eps) [worker 2] episodes_seen=2490 last_return=-44.7 (+1 eps) [worker 1] episodes_seen=2490 last_return=-36.8 (+1 eps) [worker 3] episodes_seen=2490 last_return=-93.2 (+1 eps) [worker 2] episodes_seen=2500 last_return=-74.6 (+1 eps) [worker 0] episodes_seen=2470 last_return=-14.7 (+1 eps) [worker 1] episodes_seen=2500 last_return=-74.7 (+1 eps) [worker 3] episodes_seen=2500 last_return=-119.4 (+1 eps) [worker 0] episodes_seen=2480 last_return=-33.8 (+1 eps) [worker 1] episodes_seen=2510 last_return=-97.4 (+1 eps) [worker 3] episodes_seen=2510 last_return=-10.3 (+1 eps) [worker 2] episodes_seen=2510 last_return=3.6 (+1 eps) [worker 1] episodes_seen=2520 last_return=-37.7 (+1 eps) [worker 3] episodes_seen=2520 last_return=-25.1 (+1 eps) [worker 2] episodes_seen=2520 last_return=-46.8 (+1 eps) [worker 0] episodes_seen=2490 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2530 last_return=-47.8 (+1 eps) [A2C][sync] it= 9175 steps= 1101000 (+120) avg10= -51.26 loss=83.220 pg=-0.005 vf=151.358 H=1.095 gn=786.347 [worker 2] episodes_seen=2530 last_return=-92.6 (+1 eps) [worker 0] episodes_seen=2500 last_return=-74.9 (+1 eps) [worker 3] episodes_seen=2530 last_return=0.1 (+1 eps) [worker 1] episodes_seen=2540 last_return=-36.5 (+1 eps) [worker 2] episodes_seen=2540 last_return=-93.9 (+1 eps) [worker 0] episodes_seen=2510 last_return=-34.4 (+1 eps) [worker 1] episodes_seen=2550 last_return=-47.1 (+1 eps) [worker 3] episodes_seen=2540 last_return=-79.9 (+1 eps) [worker 2] episodes_seen=2550 last_return=-52.7 (+1 eps) [worker 0] episodes_seen=2520 last_return=-61.1 (+1 eps) [worker 3] episodes_seen=2550 last_return=-59.3 (+1 eps) [worker 1] episodes_seen=2560 last_return=-40.6 (+1 eps) [worker 2] episodes_seen=2560 last_return=-56.2 (+1 eps) [worker 0] episodes_seen=2530 last_return=-85.3 (+1 eps) [worker 2] episodes_seen=2570 last_return=-5.9 (+1 eps) [worker 3] episodes_seen=2560 last_return=-108.4 (+1 eps) [worker 1] episodes_seen=2570 last_return=-70.2 (+1 eps) [worker 0] episodes_seen=2540 last_return=-48.4 (+1 eps) [A2C][sync] it= 9592 steps= 1151040 (+120) avg10= -78.15 loss=67.669 pg=-0.103 vf=123.267 H=1.236 gn=462.165 [worker 3] episodes_seen=2570 last_return=-98.3 (+1 eps) [worker 0] episodes_seen=2550 last_return=-50.8 (+1 eps) [worker 1] episodes_seen=2580 last_return=-0.7 (+1 eps) [worker 2] episodes_seen=2580 last_return=-107.8 (+1 eps) [worker 0] episodes_seen=2560 last_return=-42.4 (+1 eps) [worker 1] episodes_seen=2590 last_return=-86.9 (+1 eps) [worker 3] episodes_seen=2580 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=2590 last_return=-83.3 (+1 eps) [worker 2] episodes_seen=2600 last_return=-33.1 (+1 eps) [worker 0] episodes_seen=2570 last_return=-113.6 (+1 eps) [worker 1] episodes_seen=2600 last_return=2.8 (+1 eps) [worker 3] episodes_seen=2590 last_return=-58.1 (+1 eps) [worker 0] episodes_seen=2580 last_return=-66.9 (+1 eps) [worker 2] episodes_seen=2610 last_return=-40.5 (+1 eps) [worker 1] episodes_seen=2610 last_return=-64.6 (+1 eps) [worker 2] episodes_seen=2620 last_return=-70.7 (+1 eps) [worker 0] episodes_seen=2590 last_return=-101.0 (+1 eps) [A2C][sync] it=10000 steps= 1200000 (+120) avg10= -71.67 loss=57.200 pg=0.005 vf=104.028 H=1.035 gn=678.227 [Checkpoint] Saved self-describing checkpoint → part2_artifacts/checkpoints/a2c_run9_seed1227.pth [A2C][sync] done: steps=1200000 time=1112.6s avg10=-71.67
[Run run9_seed1227] checkpoint: part2_artifacts/checkpoints/a2c_run9_seed1227.pth [Run run9_seed1227] training plot (tail 500): part2_artifacts/train_curve_run9_seed1227.png [Run run9_seed1227] training plot (full): part2_artifacts/train_curve_full_run9_seed1227.png [Run run9_seed1227] per-worker plot: part2_artifacts/train_curve_workers_run9_seed1227.png [Run run9_seed1227] workers-average plot: part2_artifacts/train_curve_workers_avg_run9_seed1227.png
[Eval run9_seed1227] mean=-679.23 std=24.51 min=-717.69 max=-643.93 [Eval run9_seed1227] CSV: part2_artifacts/eval10_run9_seed1227.csv [Eval run9_seed1227] plot: part2_artifacts/eval10_run9_seed1227.png [Best] ep=7 return=-643.93 seed=1234
/usr/local/lib/python3.12/dist-packages/gymnasium/wrappers/rendering.py:293: UserWarning: WARN: Overwriting existing videos at /content/part2_artifacts/videos/run9_seed1227 folder (try specifying a different `video_folder` for the `RecordVideo` wrapper if this is not desired)
logger.warn(
/usr/local/lib/python3.12/dist-packages/moviepy/config_defaults.py:47: SyntaxWarning: invalid escape sequence '\P'
IMAGEMAGICK_BINARY = r"C:\Program Files\ImageMagick-6.8.8-Q16\magick.exe"
[Video run9_seed1227] episode return=-643.93 [Video run9_seed1227] saved under: part2_artifacts/videos run9_seed1227 | mean=-679.2±24.5 | best_ep=7, best_ret=-643.9
Run#10
run_id = f"run10_seed{SEED}"
# Train (multi-worker) and save checkpoint & plots
model, logs, paths = train_once(
run_id=run_id,
n_workers=4,
total_env_steps=1_400_000,
T=30,
gamma=0.99,
entropy_coef=0.015,
value_coef=0.60,
max_grad_norm=0.5,
lr=3e-4,
log_every=50_000,
)
# Fixed-seed greedy evaluation (10 episodes)
metrics, eval_paths = evaluate_10(run_id, paths.ckpt_path)
# Record the best episode from eval-10 using its seed
video_dir, best_idx, best_ret = record_best_from_eval(run_id, paths.ckpt_path)
print(f"{run_id} | mean={metrics['mean']:.1f}±{metrics['std']:.1f} | best_ep={best_idx}, best_ret={best_ret:.1f}")
[Run run10_seed1227] starting training… [A2C][sync] start: workers=4, T=30, target_steps=1400000, mp=fork [A2C][sync] it= 1 steps= 120 (+120) avg10= nan loss=178.923 pg=0.000 vf=298.240 H=1.386 gn=46.828 [worker 0] episodes_seen=10 last_return=-236.0 (+1 eps) [worker 1] episodes_seen=10 last_return=-192.9 (+1 eps) [worker 2] episodes_seen=10 last_return=-168.7 (+1 eps) [worker 3] episodes_seen=10 last_return=-71.6 (+1 eps) [worker 0] episodes_seen=20 last_return=-93.2 (+1 eps) [worker 1] episodes_seen=20 last_return=-28.2 (+1 eps) [worker 2] episodes_seen=20 last_return=-67.6 (+1 eps) [worker 3] episodes_seen=20 last_return=-124.8 (+1 eps) [worker 0] episodes_seen=30 last_return=-131.3 (+1 eps) [worker 2] episodes_seen=30 last_return=-103.2 (+1 eps) [worker 3] episodes_seen=30 last_return=-70.3 (+1 eps) [worker 1] episodes_seen=30 last_return=24.5 (+1 eps) [worker 0] episodes_seen=40 last_return=-188.7 (+1 eps) [worker 2] episodes_seen=40 last_return=-177.1 (+1 eps) [worker 1] episodes_seen=40 last_return=-146.5 (+1 eps) [worker 3] episodes_seen=40 last_return=-176.0 (+1 eps) [worker 0] episodes_seen=50 last_return=-226.8 (+1 eps) [worker 1] episodes_seen=50 last_return=-324.3 (+1 eps) [worker 2] episodes_seen=50 last_return=-224.0 (+1 eps) [worker 3] episodes_seen=50 last_return=-339.8 (+1 eps) [worker 0] episodes_seen=60 last_return=-234.4 (+1 eps) [worker 2] episodes_seen=60 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=60 last_return=-157.4 (+1 eps) [worker 3] episodes_seen=60 last_return=-170.1 (+1 eps) [worker 0] episodes_seen=70 last_return=-140.8 (+1 eps) [worker 2] episodes_seen=70 last_return=-225.8 (+1 eps) [worker 3] episodes_seen=70 last_return=-304.1 (+1 eps) [worker 1] episodes_seen=70 last_return=-307.2 (+1 eps) [worker 0] episodes_seen=80 last_return=-295.1 (+1 eps) [worker 3] episodes_seen=80 last_return=-207.5 (+1 eps) [worker 2] episodes_seen=80 last_return=-113.6 (+1 eps) [worker 1] episodes_seen=80 last_return=-263.0 (+1 eps) [worker 0] episodes_seen=90 last_return=-223.3 (+1 eps) [worker 2] episodes_seen=90 last_return=-191.3 (+1 eps) [worker 3] episodes_seen=90 last_return=-253.9 (+1 eps) [worker 1] episodes_seen=90 last_return=-107.3 (+1 eps) [worker 0] episodes_seen=100 last_return=-274.4 (+1 eps) [worker 1] episodes_seen=100 last_return=-172.1 (+1 eps) [worker 2] episodes_seen=100 last_return=-221.2 (+1 eps) [worker 3] episodes_seen=100 last_return=-295.6 (+1 eps) [worker 0] episodes_seen=110 last_return=-204.5 (+1 eps) [worker 1] episodes_seen=110 last_return=-291.5 (+1 eps) [worker 2] episodes_seen=110 last_return=-318.6 (+1 eps) [worker 3] episodes_seen=110 last_return=-307.1 (+1 eps) [worker 0] episodes_seen=120 last_return=-378.2 (+1 eps) [worker 1] episodes_seen=120 last_return=-379.0 (+1 eps) [worker 2] episodes_seen=120 last_return=-186.1 (+1 eps) [worker 3] episodes_seen=120 last_return=-193.2 (+1 eps) [worker 0] episodes_seen=130 last_return=-302.8 (+1 eps) [worker 1] episodes_seen=130 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=130 last_return=-298.2 (+1 eps) [worker 3] episodes_seen=130 last_return=-268.5 (+1 eps) [A2C][sync] it= 418 steps= 50160 (+120) avg10=-219.21 loss=14732.938 pg=-0.003 vf=24554.916 H=0.641 gn=15661.403 [worker 0] episodes_seen=140 last_return=-213.8 (+1 eps) [worker 1] episodes_seen=140 last_return=-189.6 (+1 eps) [worker 2] episodes_seen=140 last_return=-259.8 (+1 eps) [worker 3] episodes_seen=140 last_return=-37.5 (+1 eps) [worker 0] episodes_seen=150 last_return=-157.4 (+1 eps) [worker 1] episodes_seen=150 last_return=-366.9 (+1 eps) [worker 2] episodes_seen=150 last_return=-268.1 (+1 eps) [worker 3] episodes_seen=150 last_return=-305.2 (+1 eps) [worker 0] episodes_seen=160 last_return=-336.0 (+1 eps) [worker 1] episodes_seen=160 last_return=-169.3 (+1 eps) [worker 2] episodes_seen=160 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=160 last_return=-232.0 (+1 eps) [worker 0] episodes_seen=170 last_return=-261.1 (+1 eps) [worker 1] episodes_seen=170 last_return=-203.5 (+1 eps) [worker 2] episodes_seen=170 last_return=-345.8 (+1 eps) [worker 3] episodes_seen=170 last_return=-170.3 (+1 eps) [worker 0] episodes_seen=180 last_return=-435.9 (+1 eps) [worker 1] episodes_seen=180 last_return=-257.3 (+1 eps) [worker 2] episodes_seen=180 last_return=-243.3 (+1 eps) [worker 3] episodes_seen=180 last_return=-164.5 (+1 eps) [worker 0] episodes_seen=190 last_return=-341.4 (+1 eps) [worker 1] episodes_seen=190 last_return=-246.9 (+1 eps) [worker 2] episodes_seen=190 last_return=-127.2 (+1 eps) [worker 3] episodes_seen=190 last_return=-294.0 (+1 eps) [worker 0] episodes_seen=200 last_return=-219.5 (+1 eps) [worker 1] episodes_seen=200 last_return=-433.6 (+1 eps) [worker 2] episodes_seen=200 last_return=-130.3 (+1 eps) [worker 3] episodes_seen=200 last_return=-246.2 (+1 eps) [worker 0] episodes_seen=210 last_return=-222.7 (+1 eps) [worker 1] episodes_seen=210 last_return=-256.2 (+1 eps) [worker 2] episodes_seen=210 last_return=-269.4 (+1 eps) [worker 3] episodes_seen=210 last_return=-9.5 (+1 eps) [worker 0] episodes_seen=220 last_return=-352.0 (+1 eps) [worker 2] episodes_seen=220 last_return=-364.3 (+1 eps) [worker 1] episodes_seen=220 last_return=-453.7 (+1 eps) [worker 3] episodes_seen=220 last_return=-104.3 (+1 eps) [worker 0] episodes_seen=230 last_return=-171.7 (+1 eps) [worker 1] episodes_seen=230 last_return=-197.7 (+1 eps) [worker 2] episodes_seen=230 last_return=-150.1 (+1 eps) [worker 3] episodes_seen=230 last_return=-316.7 (+1 eps) [worker 0] episodes_seen=240 last_return=-352.1 (+1 eps) [worker 1] episodes_seen=240 last_return=-461.4 (+1 eps) [worker 2] episodes_seen=240 last_return=-361.5 (+1 eps) [worker 3] episodes_seen=240 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=250 last_return=-249.3 (+1 eps) [worker 1] episodes_seen=250 last_return=-281.9 (+1 eps) [worker 2] episodes_seen=250 last_return=-21.6 (+1 eps) [worker 3] episodes_seen=250 last_return=-108.8 (+1 eps) [worker 0] episodes_seen=260 last_return=-103.6 (+1 eps) [worker 1] episodes_seen=260 last_return=-83.9 (+1 eps) [worker 2] episodes_seen=260 last_return=-82.2 (+1 eps) [worker 3] episodes_seen=260 last_return=-109.8 (+1 eps) [worker 1] episodes_seen=270 last_return=-89.3 (+1 eps) [worker 2] episodes_seen=270 last_return=-82.1 (+1 eps) [worker 3] episodes_seen=270 last_return=-81.0 (+1 eps) [worker 0] episodes_seen=270 last_return=-181.2 (+1 eps) [worker 1] episodes_seen=280 last_return=-258.1 (+1 eps) [worker 2] episodes_seen=280 last_return=-122.2 (+1 eps) [worker 3] episodes_seen=280 last_return=-201.5 (+1 eps) [worker 0] episodes_seen=280 last_return=33.5 (+1 eps) [worker 1] episodes_seen=290 last_return=-82.8 (+1 eps) [worker 2] episodes_seen=290 last_return=-85.6 (+1 eps) [worker 3] episodes_seen=290 last_return=-91.3 (+1 eps) [worker 0] episodes_seen=290 last_return=-73.7 (+1 eps) [worker 1] episodes_seen=300 last_return=-66.4 (+1 eps) [worker 2] episodes_seen=300 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=300 last_return=-98.3 (+1 eps) [A2C][sync] it= 835 steps= 100200 (+120) avg10= -93.64 loss=1846.304 pg=-0.002 vf=3077.196 H=0.744 gn=4800.085 [worker 0] episodes_seen=300 last_return=-198.6 (+1 eps) [worker 1] episodes_seen=310 last_return=-218.1 (+1 eps) [worker 2] episodes_seen=310 last_return=-85.3 (+1 eps) [worker 3] episodes_seen=310 last_return=-100.3 (+1 eps) [worker 0] episodes_seen=310 last_return=-132.2 (+1 eps) [worker 1] episodes_seen=320 last_return=-131.3 (+1 eps) [worker 2] episodes_seen=320 last_return=-95.7 (+1 eps) [worker 3] episodes_seen=320 last_return=-85.5 (+1 eps) [worker 0] episodes_seen=320 last_return=-105.6 (+1 eps) [worker 1] episodes_seen=330 last_return=-86.0 (+1 eps) [worker 2] episodes_seen=330 last_return=-62.6 (+1 eps) [worker 3] episodes_seen=330 last_return=-68.5 (+1 eps) [worker 0] episodes_seen=330 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=340 last_return=-86.4 (+1 eps) [worker 1] episodes_seen=340 last_return=-76.4 (+1 eps) [worker 3] episodes_seen=340 last_return=-109.1 (+1 eps) [worker 0] episodes_seen=340 last_return=-307.0 (+1 eps) [worker 1] episodes_seen=350 last_return=-250.6 (+1 eps) [worker 2] episodes_seen=350 last_return=-164.9 (+1 eps) [worker 3] episodes_seen=350 last_return=-282.6 (+1 eps) [worker 0] episodes_seen=350 last_return=-175.7 (+1 eps) [worker 1] episodes_seen=360 last_return=-99.9 (+1 eps) [worker 2] episodes_seen=360 last_return=-210.0 (+1 eps) [worker 3] episodes_seen=360 last_return=-114.8 (+1 eps) [worker 0] episodes_seen=360 last_return=-250.2 (+1 eps) [worker 2] episodes_seen=370 last_return=-325.7 (+1 eps) [worker 3] episodes_seen=370 last_return=-111.7 (+1 eps) [worker 1] episodes_seen=370 last_return=-251.1 (+1 eps) [worker 0] episodes_seen=370 last_return=-182.4 (+1 eps) [worker 1] episodes_seen=380 last_return=-303.0 (+1 eps) [worker 3] episodes_seen=380 last_return=-141.5 (+1 eps) [worker 2] episodes_seen=380 last_return=-192.8 (+1 eps) [worker 0] episodes_seen=380 last_return=-176.6 (+1 eps) [worker 3] episodes_seen=390 last_return=-222.1 (+1 eps) [worker 1] episodes_seen=390 last_return=-172.5 (+1 eps) [worker 2] episodes_seen=390 last_return=-335.4 (+1 eps) [worker 0] episodes_seen=390 last_return=-396.4 (+1 eps) [worker 1] episodes_seen=400 last_return=-427.8 (+1 eps) [worker 3] episodes_seen=400 last_return=-291.2 (+1 eps) [worker 2] episodes_seen=400 last_return=-467.7 (+1 eps) [worker 0] episodes_seen=400 last_return=-384.9 (+1 eps) [worker 1] episodes_seen=410 last_return=-158.1 (+1 eps) [worker 3] episodes_seen=410 last_return=-141.0 (+1 eps) [worker 2] episodes_seen=410 last_return=-112.1 (+1 eps) [worker 3] episodes_seen=420 last_return=-324.4 (+1 eps) [worker 0] episodes_seen=410 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=420 last_return=-185.5 (+1 eps) [worker 2] episodes_seen=420 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=430 last_return=-336.6 (+1 eps) [worker 0] episodes_seen=420 last_return=-315.1 (+1 eps) [worker 3] episodes_seen=430 last_return=-112.6 (+1 eps) [worker 2] episodes_seen=430 last_return=-120.1 (+1 eps) [worker 0] episodes_seen=430 last_return=-289.9 (+1 eps) [worker 3] episodes_seen=440 last_return=-362.2 (+1 eps) [worker 1] episodes_seen=440 last_return=-396.8 (+1 eps) [worker 2] episodes_seen=440 last_return=-230.6 (+1 eps) [A2C][sync] it= 1252 steps= 150240 (+120) avg10=-246.24 loss=3018.892 pg=0.074 vf=5031.380 H=0.683 gn=14363.581 [worker 0] episodes_seen=440 last_return=-271.7 (+1 eps) [worker 1] episodes_seen=450 last_return=-227.5 (+1 eps) [worker 2] episodes_seen=450 last_return=-265.1 (+1 eps) [worker 3] episodes_seen=450 last_return=-123.1 (+1 eps) [worker 0] episodes_seen=450 last_return=-246.6 (+1 eps) [worker 1] episodes_seen=460 last_return=-273.5 (+1 eps) [worker 3] episodes_seen=460 last_return=-187.3 (+1 eps) [worker 2] episodes_seen=460 last_return=-496.2 (+1 eps) [worker 0] episodes_seen=460 last_return=-157.5 (+1 eps) [worker 3] episodes_seen=470 last_return=-118.8 (+1 eps) [worker 1] episodes_seen=470 last_return=-239.3 (+1 eps) [worker 2] episodes_seen=470 last_return=-277.8 (+1 eps) [worker 0] episodes_seen=470 last_return=-117.4 (+1 eps) [worker 1] episodes_seen=480 last_return=-263.6 (+1 eps) [worker 2] episodes_seen=480 last_return=-244.8 (+1 eps) [worker 3] episodes_seen=480 last_return=-177.2 (+1 eps) [worker 0] episodes_seen=480 last_return=-183.7 (+1 eps) [worker 1] episodes_seen=490 last_return=-222.4 (+1 eps) [worker 3] episodes_seen=490 last_return=-287.5 (+1 eps) [worker 2] episodes_seen=490 last_return=-166.6 (+1 eps) [worker 0] episodes_seen=490 last_return=-231.2 (+1 eps) [worker 1] episodes_seen=500 last_return=-123.2 (+1 eps) [worker 2] episodes_seen=500 last_return=-406.3 (+1 eps) [worker 3] episodes_seen=500 last_return=-277.0 (+1 eps) [worker 0] episodes_seen=500 last_return=-226.1 (+1 eps) [worker 1] episodes_seen=510 last_return=-98.3 (+1 eps) [worker 2] episodes_seen=510 last_return=-190.9 (+1 eps) [worker 3] episodes_seen=510 last_return=-426.8 (+1 eps) [worker 0] episodes_seen=510 last_return=-63.9 (+1 eps) [worker 1] episodes_seen=520 last_return=-97.9 (+1 eps) [worker 2] episodes_seen=520 last_return=-98.1 (+1 eps) [worker 3] episodes_seen=520 last_return=-101.8 (+1 eps) [worker 0] episodes_seen=520 last_return=-74.2 (+1 eps) [worker 3] episodes_seen=530 last_return=-83.9 (+1 eps) [worker 2] episodes_seen=530 last_return=-79.2 (+1 eps) [worker 1] episodes_seen=530 last_return=-77.5 (+1 eps) [worker 0] episodes_seen=530 last_return=-90.0 (+1 eps) [worker 3] episodes_seen=540 last_return=-197.4 (+1 eps) [worker 1] episodes_seen=540 last_return=-47.2 (+1 eps) [worker 2] episodes_seen=540 last_return=-87.0 (+1 eps) [worker 0] episodes_seen=540 last_return=-96.3 (+1 eps) [worker 1] episodes_seen=550 last_return=-92.2 (+1 eps) [worker 3] episodes_seen=550 last_return=-113.2 (+1 eps) [worker 2] episodes_seen=550 last_return=-145.2 (+1 eps) [worker 0] episodes_seen=550 last_return=-214.7 (+1 eps) [worker 1] episodes_seen=560 last_return=-88.7 (+1 eps) [worker 2] episodes_seen=560 last_return=-90.8 (+1 eps) [worker 3] episodes_seen=560 last_return=-69.2 (+1 eps) [worker 1] episodes_seen=570 last_return=-93.0 (+1 eps) [worker 0] episodes_seen=560 last_return=-100.0 (+1 eps) [A2C][sync] it= 1669 steps= 200280 (+120) avg10=-142.46 loss=316.918 pg=-0.213 vf=528.568 H=0.656 gn=1382.966 [worker 2] episodes_seen=570 last_return=-103.8 (+1 eps) [worker 3] episodes_seen=570 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=580 last_return=-80.0 (+1 eps) [worker 0] episodes_seen=570 last_return=-115.8 (+1 eps) [worker 2] episodes_seen=580 last_return=-84.5 (+1 eps) [worker 3] episodes_seen=580 last_return=-85.0 (+1 eps) [worker 1] episodes_seen=590 last_return=-96.1 (+1 eps) [worker 0] episodes_seen=580 last_return=-87.7 (+1 eps) [worker 2] episodes_seen=590 last_return=-108.4 (+1 eps) [worker 3] episodes_seen=590 last_return=-101.2 (+1 eps) [worker 1] episodes_seen=600 last_return=-121.7 (+1 eps) [worker 0] episodes_seen=590 last_return=-148.4 (+1 eps) [worker 2] episodes_seen=600 last_return=-77.9 (+1 eps) [worker 3] episodes_seen=600 last_return=-125.6 (+1 eps) [worker 0] episodes_seen=600 last_return=-104.0 (+1 eps) [worker 1] episodes_seen=610 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=610 last_return=-89.7 (+1 eps) [worker 3] episodes_seen=610 last_return=-105.0 (+1 eps) [worker 0] episodes_seen=610 last_return=-57.1 (+1 eps) [worker 1] episodes_seen=620 last_return=-65.7 (+1 eps) [worker 2] episodes_seen=620 last_return=-118.9 (+1 eps) [worker 3] episodes_seen=620 last_return=-92.3 (+1 eps) [worker 0] episodes_seen=620 last_return=-74.7 (+1 eps) [worker 1] episodes_seen=630 last_return=-90.3 (+1 eps) [worker 2] episodes_seen=630 last_return=-71.7 (+1 eps) [worker 3] episodes_seen=630 last_return=33.4 (+1 eps) [worker 0] episodes_seen=630 last_return=-81.9 (+1 eps) [worker 1] episodes_seen=640 last_return=-62.5 (+1 eps) [worker 2] episodes_seen=640 last_return=-121.1 (+1 eps) [worker 3] episodes_seen=640 last_return=135.4 (+1 eps) [worker 0] episodes_seen=640 last_return=-95.1 (+1 eps) [worker 1] episodes_seen=650 last_return=-80.6 (+1 eps) [worker 2] episodes_seen=650 last_return=-73.9 (+1 eps) [worker 3] episodes_seen=650 last_return=-91.0 (+1 eps) [worker 0] episodes_seen=650 last_return=17.2 (+1 eps) [worker 2] episodes_seen=660 last_return=-102.4 (+1 eps) [worker 1] episodes_seen=660 last_return=-81.1 (+1 eps) [worker 3] episodes_seen=660 last_return=-102.7 (+1 eps) [worker 1] episodes_seen=670 last_return=-93.6 (+1 eps) [worker 0] episodes_seen=660 last_return=63.4 (+1 eps) [worker 2] episodes_seen=670 last_return=-65.8 (+1 eps) [worker 3] episodes_seen=670 last_return=-80.1 (+1 eps) [worker 0] episodes_seen=670 last_return=-44.2 (+1 eps) [worker 3] episodes_seen=680 last_return=-74.5 (+1 eps) [worker 2] episodes_seen=680 last_return=-57.9 (+1 eps) [worker 1] episodes_seen=680 last_return=-76.2 (+1 eps) [worker 2] episodes_seen=690 last_return=81.6 (+1 eps) [worker 0] episodes_seen=680 last_return=-14.7 (+1 eps) [worker 3] episodes_seen=690 last_return=-57.6 (+1 eps) [worker 1] episodes_seen=690 last_return=-84.8 (+1 eps) [worker 0] episodes_seen=690 last_return=-87.6 (+1 eps) [worker 2] episodes_seen=700 last_return=-204.4 (+1 eps) [worker 3] episodes_seen=700 last_return=-82.5 (+1 eps) [worker 1] episodes_seen=700 last_return=-95.1 (+1 eps) [worker 0] episodes_seen=700 last_return=-152.6 (+1 eps) [worker 3] episodes_seen=710 last_return=-43.4 (+1 eps) [worker 2] episodes_seen=710 last_return=-152.9 (+1 eps) [worker 1] episodes_seen=710 last_return=-94.3 (+1 eps) [worker 0] episodes_seen=710 last_return=-147.8 (+1 eps) [worker 3] episodes_seen=720 last_return=-79.7 (+1 eps) [worker 1] episodes_seen=720 last_return=-55.8 (+1 eps) [worker 2] episodes_seen=720 last_return=-189.3 (+1 eps) [A2C][sync] it= 2086 steps= 250320 (+120) avg10= -95.76 loss=780.494 pg=-0.042 vf=1300.919 H=1.025 gn=899.097 [worker 3] episodes_seen=730 last_return=-141.8 (+1 eps) [worker 0] episodes_seen=720 last_return=-60.7 (+1 eps) [worker 1] episodes_seen=730 last_return=-85.0 (+1 eps) [worker 2] episodes_seen=730 last_return=-75.2 (+1 eps) [worker 0] episodes_seen=730 last_return=-129.8 (+1 eps) [worker 3] episodes_seen=740 last_return=-87.2 (+1 eps) [worker 1] episodes_seen=740 last_return=-184.9 (+1 eps) [worker 2] episodes_seen=740 last_return=-175.9 (+1 eps) [worker 3] episodes_seen=750 last_return=-149.3 (+1 eps) [worker 1] episodes_seen=750 last_return=-129.9 (+1 eps) [worker 0] episodes_seen=740 last_return=-123.3 (+1 eps) [worker 2] episodes_seen=750 last_return=-116.9 (+1 eps) [worker 3] episodes_seen=760 last_return=-85.4 (+1 eps) [worker 1] episodes_seen=760 last_return=-124.6 (+1 eps) [worker 0] episodes_seen=750 last_return=-169.9 (+1 eps) [worker 2] episodes_seen=760 last_return=-141.8 (+1 eps) [worker 3] episodes_seen=770 last_return=-68.8 (+1 eps) [worker 1] episodes_seen=770 last_return=-68.1 (+1 eps) [worker 0] episodes_seen=760 last_return=-96.4 (+1 eps) [worker 2] episodes_seen=770 last_return=-86.2 (+1 eps) [worker 3] episodes_seen=780 last_return=-172.6 (+1 eps) [worker 1] episodes_seen=780 last_return=-112.1 (+1 eps) [worker 0] episodes_seen=770 last_return=-82.6 (+1 eps) [worker 2] episodes_seen=780 last_return=-151.6 (+1 eps) [worker 3] episodes_seen=790 last_return=-111.9 (+1 eps) [worker 1] episodes_seen=790 last_return=-183.3 (+1 eps) [worker 3] episodes_seen=800 last_return=-153.0 (+1 eps) [worker 2] episodes_seen=790 last_return=-171.1 (+1 eps) [worker 1] episodes_seen=800 last_return=-81.9 (+1 eps) [worker 0] episodes_seen=780 last_return=-72.7 (+1 eps) [worker 3] episodes_seen=810 last_return=-146.3 (+1 eps) [worker 2] episodes_seen=800 last_return=-121.7 (+1 eps) [worker 1] episodes_seen=810 last_return=-104.3 (+1 eps) [worker 0] episodes_seen=790 last_return=-219.6 (+1 eps) [worker 2] episodes_seen=810 last_return=-67.9 (+1 eps) [worker 1] episodes_seen=820 last_return=-96.0 (+1 eps) [worker 0] episodes_seen=800 last_return=-174.7 (+1 eps) [worker 3] episodes_seen=820 last_return=-180.5 (+1 eps) [worker 2] episodes_seen=820 last_return=-0.5 (+1 eps) [worker 1] episodes_seen=830 last_return=-107.5 (+1 eps) [worker 0] episodes_seen=810 last_return=-34.6 (+1 eps) [worker 3] episodes_seen=830 last_return=-114.6 (+1 eps) [worker 2] episodes_seen=830 last_return=-186.7 (+1 eps) [worker 1] episodes_seen=840 last_return=-93.2 (+1 eps) [worker 0] episodes_seen=820 last_return=-127.8 (+1 eps) [A2C][sync] it= 2503 steps= 300360 (+120) avg10=-160.07 loss=1340.182 pg=-0.187 vf=2233.977 H=1.187 gn=1943.215 [worker 3] episodes_seen=840 last_return=-114.3 (+1 eps) [worker 2] episodes_seen=840 last_return=-83.1 (+1 eps) [worker 1] episodes_seen=850 last_return=-257.1 (+1 eps) [worker 0] episodes_seen=830 last_return=-115.3 (+1 eps) [worker 3] episodes_seen=850 last_return=-53.2 (+1 eps) [worker 2] episodes_seen=850 last_return=-170.1 (+1 eps) [worker 1] episodes_seen=860 last_return=-89.5 (+1 eps) [worker 0] episodes_seen=840 last_return=-79.0 (+1 eps) [worker 3] episodes_seen=860 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=860 last_return=-86.1 (+1 eps) [worker 1] episodes_seen=870 last_return=-99.4 (+1 eps) [worker 0] episodes_seen=850 last_return=-63.0 (+1 eps) [worker 3] episodes_seen=870 last_return=-75.8 (+1 eps) [worker 2] episodes_seen=870 last_return=-86.0 (+1 eps) [worker 0] episodes_seen=860 last_return=-49.4 (+1 eps) [worker 1] episodes_seen=880 last_return=-42.2 (+1 eps) [worker 3] episodes_seen=880 last_return=-88.8 (+1 eps) [worker 2] episodes_seen=880 last_return=-134.1 (+1 eps) [worker 0] episodes_seen=870 last_return=-151.4 (+1 eps) [worker 1] episodes_seen=890 last_return=-355.7 (+1 eps) [worker 3] episodes_seen=890 last_return=-170.2 (+1 eps) [worker 2] episodes_seen=890 last_return=-197.8 (+1 eps) [worker 1] episodes_seen=900 last_return=-130.4 (+1 eps) [worker 0] episodes_seen=880 last_return=-124.9 (+1 eps) [worker 3] episodes_seen=900 last_return=-85.3 (+1 eps) [worker 2] episodes_seen=900 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=910 last_return=-106.4 (+1 eps) [worker 0] episodes_seen=890 last_return=-104.5 (+1 eps) [worker 3] episodes_seen=910 last_return=-87.7 (+1 eps) [worker 2] episodes_seen=910 last_return=-235.7 (+1 eps) [worker 0] episodes_seen=900 last_return=-188.3 (+1 eps) [worker 1] episodes_seen=920 last_return=-160.2 (+1 eps) [worker 3] episodes_seen=920 last_return=-232.4 (+1 eps) [worker 2] episodes_seen=920 last_return=-327.7 (+1 eps) [worker 0] episodes_seen=910 last_return=-154.9 (+1 eps) [worker 1] episodes_seen=930 last_return=-279.0 (+1 eps) [worker 3] episodes_seen=930 last_return=-256.0 (+1 eps) [worker 2] episodes_seen=930 last_return=-95.3 (+1 eps) [worker 0] episodes_seen=920 last_return=-166.2 (+1 eps) [worker 1] episodes_seen=940 last_return=-74.2 (+1 eps) [worker 3] episodes_seen=940 last_return=-177.7 (+1 eps) [worker 2] episodes_seen=940 last_return=-121.6 (+1 eps) [worker 0] episodes_seen=930 last_return=-261.0 (+1 eps) [worker 1] episodes_seen=950 last_return=-88.3 (+1 eps) [worker 3] episodes_seen=950 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=950 last_return=-145.0 (+1 eps) [worker 0] episodes_seen=940 last_return=-102.9 (+1 eps) [worker 1] episodes_seen=960 last_return=-125.7 (+1 eps) [worker 3] episodes_seen=960 last_return=-69.6 (+1 eps) [worker 2] episodes_seen=960 last_return=-1.8 (+1 eps) [worker 0] episodes_seen=950 last_return=-86.5 (+1 eps) [worker 1] episodes_seen=970 last_return=-61.5 (+1 eps) [worker 3] episodes_seen=970 last_return=-76.2 (+1 eps) [worker 0] episodes_seen=960 last_return=-169.0 (+1 eps) [worker 2] episodes_seen=970 last_return=-101.8 (+1 eps) [worker 1] episodes_seen=980 last_return=-37.9 (+1 eps) [worker 3] episodes_seen=980 last_return=-95.0 (+1 eps) [worker 2] episodes_seen=980 last_return=-181.2 (+1 eps) [worker 0] episodes_seen=970 last_return=-161.2 (+1 eps) [A2C][sync] it= 2920 steps= 350400 (+120) avg10=-184.45 loss=1392.512 pg=0.076 vf=2320.747 H=0.786 gn=3699.871 [worker 1] episodes_seen=990 last_return=-171.7 (+1 eps) [worker 3] episodes_seen=990 last_return=-112.9 (+1 eps) [worker 0] episodes_seen=980 last_return=-86.9 (+1 eps) [worker 2] episodes_seen=990 last_return=-68.5 (+1 eps) [worker 1] episodes_seen=1000 last_return=-54.1 (+1 eps) [worker 3] episodes_seen=1000 last_return=-61.7 (+1 eps) [worker 0] episodes_seen=990 last_return=-88.4 (+1 eps) [worker 2] episodes_seen=1000 last_return=-38.3 (+1 eps) [worker 1] episodes_seen=1010 last_return=-54.6 (+1 eps) [worker 3] episodes_seen=1010 last_return=-156.3 (+1 eps) [worker 0] episodes_seen=1000 last_return=-74.5 (+1 eps) [worker 2] episodes_seen=1010 last_return=-83.3 (+1 eps) [worker 1] episodes_seen=1020 last_return=-7.1 (+1 eps) [worker 3] episodes_seen=1020 last_return=-119.5 (+1 eps) [worker 0] episodes_seen=1010 last_return=-130.9 (+1 eps) [worker 2] episodes_seen=1020 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1030 last_return=-81.0 (+1 eps) [worker 3] episodes_seen=1030 last_return=-98.4 (+1 eps) [worker 0] episodes_seen=1020 last_return=23.8 (+1 eps) [worker 2] episodes_seen=1030 last_return=104.6 (+1 eps) [worker 1] episodes_seen=1040 last_return=-130.2 (+1 eps) [worker 3] episodes_seen=1040 last_return=-409.4 (+1 eps) [worker 0] episodes_seen=1030 last_return=-140.5 (+1 eps) [worker 2] episodes_seen=1040 last_return=-375.5 (+1 eps) [worker 1] episodes_seen=1050 last_return=-307.0 (+1 eps) [worker 3] episodes_seen=1050 last_return=-275.4 (+1 eps) [worker 0] episodes_seen=1040 last_return=-154.0 (+1 eps) [worker 2] episodes_seen=1050 last_return=-301.5 (+1 eps) [worker 1] episodes_seen=1060 last_return=-280.6 (+1 eps) [worker 3] episodes_seen=1060 last_return=-285.0 (+1 eps) [worker 0] episodes_seen=1050 last_return=-152.1 (+1 eps) [worker 2] episodes_seen=1060 last_return=-183.8 (+1 eps) [worker 1] episodes_seen=1070 last_return=-140.1 (+1 eps) [worker 3] episodes_seen=1070 last_return=-126.0 (+1 eps) [worker 0] episodes_seen=1060 last_return=-278.8 (+1 eps) [worker 2] episodes_seen=1070 last_return=-493.0 (+1 eps) [worker 1] episodes_seen=1080 last_return=-366.5 (+1 eps) [worker 3] episodes_seen=1080 last_return=-335.0 (+1 eps) [worker 0] episodes_seen=1070 last_return=-123.2 (+1 eps) [worker 2] episodes_seen=1080 last_return=-369.7 (+1 eps) [worker 3] episodes_seen=1090 last_return=-248.2 (+1 eps) [worker 1] episodes_seen=1090 last_return=-164.0 (+1 eps) [worker 0] episodes_seen=1080 last_return=-384.2 (+1 eps) [worker 2] episodes_seen=1090 last_return=-178.8 (+1 eps) [worker 1] episodes_seen=1100 last_return=-228.5 (+1 eps) [worker 3] episodes_seen=1100 last_return=-314.5 (+1 eps) [worker 0] episodes_seen=1090 last_return=-435.2 (+1 eps) [worker 2] episodes_seen=1100 last_return=-355.2 (+1 eps) [worker 3] episodes_seen=1110 last_return=-204.4 (+1 eps) [worker 1] episodes_seen=1110 last_return=-368.2 (+1 eps) [worker 0] episodes_seen=1100 last_return=-368.0 (+1 eps) [worker 2] episodes_seen=1110 last_return=-28.7 (+1 eps) [worker 3] episodes_seen=1120 last_return=-240.7 (+1 eps) [worker 1] episodes_seen=1120 last_return=-375.3 (+1 eps) [worker 0] episodes_seen=1110 last_return=-191.3 (+1 eps) [worker 2] episodes_seen=1120 last_return=-119.9 (+1 eps) [worker 3] episodes_seen=1130 last_return=-341.8 (+1 eps) [worker 1] episodes_seen=1130 last_return=-214.9 (+1 eps) [worker 0] episodes_seen=1120 last_return=-280.3 (+1 eps) [worker 2] episodes_seen=1130 last_return=-92.7 (+1 eps) [worker 3] episodes_seen=1140 last_return=-444.4 (+1 eps) [worker 1] episodes_seen=1140 last_return=-171.8 (+1 eps) [worker 0] episodes_seen=1130 last_return=-260.8 (+1 eps) [worker 2] episodes_seen=1140 last_return=-265.2 (+1 eps) [worker 3] episodes_seen=1150 last_return=-190.1 (+1 eps) [worker 1] episodes_seen=1150 last_return=-134.3 (+1 eps) [worker 0] episodes_seen=1140 last_return=-170.4 (+1 eps) [A2C][sync] it= 3337 steps= 400440 (+120) avg10=-216.29 loss=2788.387 pg=0.002 vf=4647.310 H=0.008 gn=3840.502 [worker 2] episodes_seen=1150 last_return=-333.5 (+1 eps) [worker 3] episodes_seen=1160 last_return=-324.9 (+1 eps) [worker 1] episodes_seen=1160 last_return=-512.3 (+1 eps) [worker 0] episodes_seen=1150 last_return=-185.7 (+1 eps) [worker 2] episodes_seen=1160 last_return=-203.7 (+1 eps) [worker 3] episodes_seen=1170 last_return=-172.7 (+1 eps) [worker 1] episodes_seen=1170 last_return=-254.2 (+1 eps) [worker 0] episodes_seen=1160 last_return=-151.6 (+1 eps) [worker 2] episodes_seen=1170 last_return=-443.1 (+1 eps) [worker 3] episodes_seen=1180 last_return=-165.9 (+1 eps) [worker 1] episodes_seen=1180 last_return=-140.7 (+1 eps) [worker 0] episodes_seen=1170 last_return=-372.1 (+1 eps) [worker 2] episodes_seen=1180 last_return=-129.0 (+1 eps) [worker 3] episodes_seen=1190 last_return=-307.3 (+1 eps) [worker 1] episodes_seen=1190 last_return=-257.0 (+1 eps) [worker 0] episodes_seen=1180 last_return=-153.1 (+1 eps) [worker 2] episodes_seen=1190 last_return=-176.8 (+1 eps) [worker 3] episodes_seen=1200 last_return=-57.6 (+1 eps) [worker 1] episodes_seen=1200 last_return=-243.9 (+1 eps) [worker 0] episodes_seen=1190 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1200 last_return=-107.4 (+1 eps) [worker 1] episodes_seen=1210 last_return=-142.6 (+1 eps) [worker 3] episodes_seen=1210 last_return=-1.0 (+1 eps) [worker 0] episodes_seen=1200 last_return=-60.0 (+1 eps) [worker 2] episodes_seen=1210 last_return=-148.3 (+1 eps) [worker 1] episodes_seen=1220 last_return=-122.7 (+1 eps) [worker 3] episodes_seen=1220 last_return=-104.0 (+1 eps) [worker 0] episodes_seen=1210 last_return=-116.0 (+1 eps) [worker 2] episodes_seen=1220 last_return=-42.1 (+1 eps) [worker 3] episodes_seen=1230 last_return=-186.9 (+1 eps) [worker 1] episodes_seen=1230 last_return=-40.7 (+1 eps) [worker 0] episodes_seen=1220 last_return=-102.7 (+1 eps) [worker 2] episodes_seen=1230 last_return=-94.0 (+1 eps) [worker 3] episodes_seen=1240 last_return=-81.6 (+1 eps) [worker 1] episodes_seen=1240 last_return=-45.4 (+1 eps) [worker 0] episodes_seen=1230 last_return=-349.3 (+1 eps) [worker 2] episodes_seen=1240 last_return=-28.4 (+1 eps) [worker 3] episodes_seen=1250 last_return=-141.5 (+1 eps) [worker 1] episodes_seen=1250 last_return=-78.5 (+1 eps) [worker 0] episodes_seen=1240 last_return=-94.0 (+1 eps) [worker 2] episodes_seen=1250 last_return=-55.9 (+1 eps) [worker 1] episodes_seen=1260 last_return=-63.4 (+1 eps) [worker 3] episodes_seen=1260 last_return=30.0 (+1 eps) [worker 0] episodes_seen=1250 last_return=-319.3 (+1 eps) [worker 1] episodes_seen=1270 last_return=-146.3 (+1 eps) [worker 2] episodes_seen=1260 last_return=-5.0 (+1 eps) [worker 3] episodes_seen=1270 last_return=-127.3 (+1 eps) [worker 0] episodes_seen=1260 last_return=-128.5 (+1 eps) [worker 1] episodes_seen=1280 last_return=-146.8 (+1 eps) [worker 3] episodes_seen=1280 last_return=-313.1 (+1 eps) [worker 2] episodes_seen=1270 last_return=-26.1 (+1 eps) [worker 0] episodes_seen=1270 last_return=-388.8 (+1 eps) [worker 1] episodes_seen=1290 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=1290 last_return=-84.8 (+1 eps) [worker 2] episodes_seen=1280 last_return=-115.2 (+1 eps) [worker 0] episodes_seen=1280 last_return=-252.9 (+1 eps) [worker 1] episodes_seen=1300 last_return=-116.9 (+1 eps) [worker 2] episodes_seen=1290 last_return=51.6 (+1 eps) [worker 3] episodes_seen=1300 last_return=-332.3 (+1 eps) [worker 0] episodes_seen=1290 last_return=-100.4 (+1 eps) [worker 1] episodes_seen=1310 last_return=-53.3 (+1 eps) [worker 3] episodes_seen=1310 last_return=-63.6 (+1 eps) [worker 2] episodes_seen=1300 last_return=-62.9 (+1 eps) [worker 0] episodes_seen=1300 last_return=-91.3 (+1 eps) [worker 1] episodes_seen=1320 last_return=-38.4 (+1 eps) [worker 3] episodes_seen=1320 last_return=-111.1 (+1 eps) [worker 2] episodes_seen=1310 last_return=-208.8 (+1 eps) [A2C][sync] it= 3754 steps= 450480 (+120) avg10=-170.33 loss=74.932 pg=-0.036 vf=124.977 H=1.183 gn=356.315 [worker 0] episodes_seen=1310 last_return=-92.8 (+1 eps) [worker 1] episodes_seen=1330 last_return=-103.4 (+1 eps) [worker 3] episodes_seen=1330 last_return=-68.2 (+1 eps) [worker 2] episodes_seen=1320 last_return=-50.5 (+1 eps) [worker 0] episodes_seen=1320 last_return=-47.0 (+1 eps) [worker 1] episodes_seen=1340 last_return=-100.5 (+1 eps) [worker 3] episodes_seen=1340 last_return=-23.6 (+1 eps) [worker 2] episodes_seen=1330 last_return=-225.2 (+1 eps) [worker 0] episodes_seen=1330 last_return=-69.8 (+1 eps) [worker 1] episodes_seen=1350 last_return=-55.8 (+1 eps) [worker 3] episodes_seen=1350 last_return=-22.7 (+1 eps) [worker 2] episodes_seen=1340 last_return=-113.4 (+1 eps) [worker 0] episodes_seen=1340 last_return=-104.6 (+1 eps) [worker 1] episodes_seen=1360 last_return=-90.7 (+1 eps) [worker 2] episodes_seen=1350 last_return=-46.5 (+1 eps) [worker 3] episodes_seen=1360 last_return=-84.8 (+1 eps) [worker 1] episodes_seen=1370 last_return=-209.0 (+1 eps) [worker 0] episodes_seen=1350 last_return=-230.6 (+1 eps) [worker 2] episodes_seen=1360 last_return=-90.4 (+1 eps) [worker 3] episodes_seen=1370 last_return=-110.2 (+1 eps) [worker 0] episodes_seen=1360 last_return=-118.8 (+1 eps) [worker 1] episodes_seen=1380 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1370 last_return=-127.3 (+1 eps) [worker 3] episodes_seen=1380 last_return=-220.3 (+1 eps) [worker 0] episodes_seen=1370 last_return=-74.2 (+1 eps) [worker 1] episodes_seen=1390 last_return=-157.2 (+1 eps) [worker 2] episodes_seen=1380 last_return=-98.4 (+1 eps) [worker 3] episodes_seen=1390 last_return=-86.9 (+1 eps) [worker 0] episodes_seen=1380 last_return=-71.7 (+1 eps) [worker 1] episodes_seen=1400 last_return=-83.6 (+1 eps) [worker 2] episodes_seen=1390 last_return=-107.0 (+1 eps) [worker 3] episodes_seen=1400 last_return=-126.9 (+1 eps) [worker 0] episodes_seen=1390 last_return=-39.6 (+1 eps) [worker 1] episodes_seen=1410 last_return=-111.3 (+1 eps) [worker 2] episodes_seen=1400 last_return=-52.3 (+1 eps) [worker 3] episodes_seen=1410 last_return=-47.1 (+1 eps) [worker 0] episodes_seen=1400 last_return=-81.8 (+1 eps) [worker 1] episodes_seen=1420 last_return=-157.3 (+1 eps) [worker 2] episodes_seen=1410 last_return=-28.5 (+1 eps) [worker 3] episodes_seen=1420 last_return=-123.7 (+1 eps) [worker 0] episodes_seen=1410 last_return=-194.7 (+1 eps) [worker 1] episodes_seen=1430 last_return=25.4 (+1 eps) [worker 2] episodes_seen=1420 last_return=-297.2 (+1 eps) [worker 3] episodes_seen=1430 last_return=-328.4 (+1 eps) [worker 0] episodes_seen=1420 last_return=-302.8 (+1 eps) [worker 1] episodes_seen=1440 last_return=-187.0 (+1 eps) [worker 2] episodes_seen=1430 last_return=-309.4 (+1 eps) [worker 3] episodes_seen=1440 last_return=-156.3 (+1 eps) [worker 0] episodes_seen=1430 last_return=-428.4 (+1 eps) [worker 1] episodes_seen=1450 last_return=-292.1 (+1 eps) [worker 2] episodes_seen=1440 last_return=-262.3 (+1 eps) [worker 3] episodes_seen=1450 last_return=-181.6 (+1 eps) [worker 0] episodes_seen=1440 last_return=-196.0 (+1 eps) [worker 1] episodes_seen=1460 last_return=-327.1 (+1 eps) [worker 2] episodes_seen=1450 last_return=-286.9 (+1 eps) [worker 3] episodes_seen=1460 last_return=-126.4 (+1 eps) [worker 0] episodes_seen=1450 last_return=-287.8 (+1 eps) [A2C][sync] it= 4171 steps= 500520 (+120) avg10=-297.79 loss=2949.549 pg=0.081 vf=4915.787 H=0.298 gn=4980.084 [worker 2] episodes_seen=1460 last_return=-298.7 (+1 eps) [worker 1] episodes_seen=1470 last_return=-109.6 (+1 eps) [worker 0] episodes_seen=1460 last_return=-244.4 (+1 eps) [worker 3] episodes_seen=1470 last_return=-80.0 (+1 eps) [worker 1] episodes_seen=1480 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1470 last_return=-435.5 (+1 eps) [worker 0] episodes_seen=1470 last_return=-197.3 (+1 eps) [worker 3] episodes_seen=1480 last_return=-163.4 (+1 eps) [worker 1] episodes_seen=1490 last_return=-216.4 (+1 eps) [worker 2] episodes_seen=1480 last_return=-80.0 (+1 eps) [worker 0] episodes_seen=1480 last_return=-288.2 (+1 eps) [worker 3] episodes_seen=1490 last_return=-307.3 (+1 eps) [worker 2] episodes_seen=1490 last_return=-321.6 (+1 eps) [worker 1] episodes_seen=1500 last_return=-243.5 (+1 eps) [worker 0] episodes_seen=1490 last_return=-233.9 (+1 eps) [worker 3] episodes_seen=1500 last_return=-252.5 (+1 eps) [worker 1] episodes_seen=1510 last_return=6.5 (+1 eps) [worker 2] episodes_seen=1500 last_return=-170.0 (+1 eps) [worker 0] episodes_seen=1500 last_return=-153.3 (+1 eps) [worker 3] episodes_seen=1510 last_return=-100.1 (+1 eps) [worker 2] episodes_seen=1510 last_return=-136.9 (+1 eps) [worker 1] episodes_seen=1520 last_return=-152.8 (+1 eps) [worker 0] episodes_seen=1510 last_return=-108.4 (+1 eps) [worker 3] episodes_seen=1520 last_return=-131.6 (+1 eps) [worker 2] episodes_seen=1520 last_return=-373.6 (+1 eps) [worker 1] episodes_seen=1530 last_return=-241.0 (+1 eps) [worker 0] episodes_seen=1520 last_return=-135.7 (+1 eps) [worker 3] episodes_seen=1530 last_return=-249.6 (+1 eps) [worker 2] episodes_seen=1530 last_return=-410.8 (+1 eps) [worker 1] episodes_seen=1540 last_return=-121.2 (+1 eps) [worker 0] episodes_seen=1530 last_return=-281.4 (+1 eps) [worker 3] episodes_seen=1540 last_return=-310.4 (+1 eps) [worker 2] episodes_seen=1540 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1550 last_return=-219.9 (+1 eps) [worker 0] episodes_seen=1540 last_return=-151.6 (+1 eps) [worker 3] episodes_seen=1550 last_return=-170.2 (+1 eps) [worker 2] episodes_seen=1550 last_return=-111.3 (+1 eps) [worker 1] episodes_seen=1560 last_return=-13.8 (+1 eps) [worker 0] episodes_seen=1550 last_return=53.6 (+1 eps) [worker 3] episodes_seen=1560 last_return=-102.3 (+1 eps) [worker 2] episodes_seen=1560 last_return=-51.2 (+1 eps) [worker 1] episodes_seen=1570 last_return=-85.6 (+1 eps) [worker 0] episodes_seen=1560 last_return=-70.4 (+1 eps) [worker 3] episodes_seen=1570 last_return=-246.9 (+1 eps) [worker 2] episodes_seen=1570 last_return=-137.2 (+1 eps) [worker 1] episodes_seen=1580 last_return=-226.5 (+1 eps) [worker 0] episodes_seen=1570 last_return=-251.0 (+1 eps) [worker 3] episodes_seen=1580 last_return=-179.4 (+1 eps) [worker 2] episodes_seen=1580 last_return=-94.4 (+1 eps) [worker 1] episodes_seen=1590 last_return=-94.7 (+1 eps) [worker 0] episodes_seen=1580 last_return=-88.0 (+1 eps) [worker 3] episodes_seen=1590 last_return=-47.1 (+1 eps) [worker 2] episodes_seen=1590 last_return=-67.2 (+1 eps) [worker 1] episodes_seen=1600 last_return=-115.5 (+1 eps) [worker 0] episodes_seen=1590 last_return=-132.2 (+1 eps) [worker 3] episodes_seen=1600 last_return=-125.0 (+1 eps) [worker 2] episodes_seen=1600 last_return=-93.0 (+1 eps) [worker 1] episodes_seen=1610 last_return=-80.4 (+1 eps) [worker 0] episodes_seen=1600 last_return=-106.6 (+1 eps) [worker 3] episodes_seen=1610 last_return=-41.5 (+1 eps) [worker 2] episodes_seen=1610 last_return=-90.5 (+1 eps) [worker 1] episodes_seen=1620 last_return=-103.9 (+1 eps) [worker 0] episodes_seen=1610 last_return=-108.6 (+1 eps) [worker 3] episodes_seen=1620 last_return=-90.9 (+1 eps) [worker 2] episodes_seen=1620 last_return=-93.7 (+1 eps) [A2C][sync] it= 4588 steps= 550560 (+120) avg10= -82.04 loss=127.197 pg=0.013 vf=211.999 H=1.000 gn=334.972 [worker 1] episodes_seen=1630 last_return=-167.4 (+1 eps) [worker 0] episodes_seen=1620 last_return=-92.9 (+1 eps) [worker 3] episodes_seen=1630 last_return=-164.7 (+1 eps) [worker 2] episodes_seen=1630 last_return=-295.6 (+1 eps) [worker 0] episodes_seen=1630 last_return=-182.6 (+1 eps) [worker 1] episodes_seen=1640 last_return=-471.7 (+1 eps) [worker 3] episodes_seen=1640 last_return=-107.1 (+1 eps) [worker 2] episodes_seen=1640 last_return=-106.5 (+1 eps) [worker 1] episodes_seen=1650 last_return=-185.6 (+1 eps) [worker 0] episodes_seen=1640 last_return=-291.5 (+1 eps) [worker 3] episodes_seen=1650 last_return=-267.8 (+1 eps) [worker 2] episodes_seen=1650 last_return=-124.5 (+1 eps) [worker 1] episodes_seen=1660 last_return=-312.7 (+1 eps) [worker 0] episodes_seen=1650 last_return=-121.9 (+1 eps) [worker 3] episodes_seen=1660 last_return=-283.2 (+1 eps) [worker 2] episodes_seen=1660 last_return=-427.4 (+1 eps) [worker 1] episodes_seen=1670 last_return=-404.5 (+1 eps) [worker 0] episodes_seen=1660 last_return=-130.8 (+1 eps) [worker 3] episodes_seen=1670 last_return=-285.9 (+1 eps) [worker 2] episodes_seen=1670 last_return=-204.1 (+1 eps) [worker 1] episodes_seen=1680 last_return=-334.5 (+1 eps) [worker 0] episodes_seen=1670 last_return=-184.1 (+1 eps) [worker 2] episodes_seen=1680 last_return=-92.5 (+1 eps) [worker 3] episodes_seen=1680 last_return=-130.6 (+1 eps) [worker 1] episodes_seen=1690 last_return=-111.2 (+1 eps) [worker 0] episodes_seen=1680 last_return=-91.5 (+1 eps) [worker 3] episodes_seen=1690 last_return=-85.1 (+1 eps) [worker 2] episodes_seen=1690 last_return=-84.9 (+1 eps) [worker 0] episodes_seen=1690 last_return=-58.7 (+1 eps) [worker 1] episodes_seen=1700 last_return=-204.8 (+1 eps) [worker 3] episodes_seen=1700 last_return=-263.0 (+1 eps) [worker 2] episodes_seen=1700 last_return=-177.1 (+1 eps) [worker 0] episodes_seen=1700 last_return=-88.3 (+1 eps) [worker 1] episodes_seen=1710 last_return=-55.7 (+1 eps) [worker 3] episodes_seen=1710 last_return=-180.5 (+1 eps) [worker 2] episodes_seen=1710 last_return=-137.7 (+1 eps) [worker 0] episodes_seen=1710 last_return=-56.5 (+1 eps) [worker 1] episodes_seen=1720 last_return=-60.2 (+1 eps) [worker 3] episodes_seen=1720 last_return=-102.2 (+1 eps) [worker 2] episodes_seen=1720 last_return=-116.8 (+1 eps) [worker 0] episodes_seen=1720 last_return=-86.9 (+1 eps) [worker 1] episodes_seen=1730 last_return=-43.8 (+1 eps) [worker 3] episodes_seen=1730 last_return=-145.4 (+1 eps) [worker 0] episodes_seen=1730 last_return=-68.8 (+1 eps) [worker 2] episodes_seen=1730 last_return=-95.9 (+1 eps) [worker 1] episodes_seen=1740 last_return=-54.1 (+1 eps) [worker 3] episodes_seen=1740 last_return=-349.5 (+1 eps) [worker 2] episodes_seen=1740 last_return=-228.1 (+1 eps) [worker 0] episodes_seen=1740 last_return=-196.3 (+1 eps) [worker 1] episodes_seen=1750 last_return=-315.8 (+1 eps) [worker 3] episodes_seen=1750 last_return=-336.6 (+1 eps) [worker 2] episodes_seen=1750 last_return=-307.1 (+1 eps) [worker 0] episodes_seen=1750 last_return=-123.4 (+1 eps) [worker 1] episodes_seen=1760 last_return=-287.9 (+1 eps) [worker 3] episodes_seen=1760 last_return=-151.7 (+1 eps) [A2C][sync] it= 5005 steps= 600600 (+120) avg10=-154.52 loss=3185.228 pg=-0.128 vf=5308.950 H=1.010 gn=7219.294 [worker 2] episodes_seen=1760 last_return=-70.3 (+1 eps) [worker 0] episodes_seen=1760 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1770 last_return=-105.7 (+1 eps) [worker 3] episodes_seen=1770 last_return=-206.7 (+1 eps) [worker 0] episodes_seen=1770 last_return=-90.6 (+1 eps) [worker 2] episodes_seen=1770 last_return=-58.5 (+1 eps) [worker 1] episodes_seen=1780 last_return=-105.0 (+1 eps) [worker 3] episodes_seen=1780 last_return=-95.0 (+1 eps) [worker 0] episodes_seen=1780 last_return=-90.3 (+1 eps) [worker 2] episodes_seen=1780 last_return=-82.2 (+1 eps) [worker 1] episodes_seen=1790 last_return=-84.3 (+1 eps) [worker 3] episodes_seen=1790 last_return=-86.8 (+1 eps) [worker 0] episodes_seen=1790 last_return=-75.2 (+1 eps) [worker 2] episodes_seen=1790 last_return=-111.0 (+1 eps) [worker 1] episodes_seen=1800 last_return=-75.2 (+1 eps) [worker 3] episodes_seen=1800 last_return=-87.9 (+1 eps) [worker 0] episodes_seen=1800 last_return=-62.9 (+1 eps) [worker 2] episodes_seen=1800 last_return=-92.3 (+1 eps) [worker 1] episodes_seen=1810 last_return=-29.7 (+1 eps) [worker 3] episodes_seen=1810 last_return=-167.6 (+1 eps) [worker 0] episodes_seen=1810 last_return=-103.4 (+1 eps) [worker 1] episodes_seen=1820 last_return=-47.9 (+1 eps) [worker 2] episodes_seen=1810 last_return=-61.5 (+1 eps) [worker 3] episodes_seen=1820 last_return=-115.3 (+1 eps) [worker 0] episodes_seen=1820 last_return=-131.5 (+1 eps) [worker 1] episodes_seen=1830 last_return=-295.4 (+1 eps) [worker 2] episodes_seen=1820 last_return=-122.3 (+1 eps) [worker 3] episodes_seen=1830 last_return=-279.7 (+1 eps) [worker 0] episodes_seen=1830 last_return=-240.3 (+1 eps) [worker 2] episodes_seen=1830 last_return=-308.6 (+1 eps) [worker 1] episodes_seen=1840 last_return=-121.2 (+1 eps) [worker 3] episodes_seen=1840 last_return=-97.2 (+1 eps) [worker 0] episodes_seen=1840 last_return=-82.1 (+1 eps) [worker 2] episodes_seen=1840 last_return=-55.4 (+1 eps) [worker 1] episodes_seen=1850 last_return=-75.0 (+1 eps) [worker 3] episodes_seen=1850 last_return=-59.4 (+1 eps) [worker 0] episodes_seen=1850 last_return=-86.1 (+1 eps) [worker 2] episodes_seen=1850 last_return=-44.7 (+1 eps) [worker 3] episodes_seen=1860 last_return=-140.6 (+1 eps) [worker 0] episodes_seen=1860 last_return=-185.0 (+1 eps) [worker 1] episodes_seen=1860 last_return=-52.3 (+1 eps) [worker 2] episodes_seen=1860 last_return=-58.2 (+1 eps) [worker 3] episodes_seen=1870 last_return=-190.6 (+1 eps) [worker 0] episodes_seen=1870 last_return=-61.3 (+1 eps) [worker 1] episodes_seen=1870 last_return=-96.1 (+1 eps) [worker 2] episodes_seen=1870 last_return=-189.7 (+1 eps) [worker 3] episodes_seen=1880 last_return=-122.3 (+1 eps) [worker 0] episodes_seen=1880 last_return=-324.3 (+1 eps) [worker 2] episodes_seen=1880 last_return=-53.9 (+1 eps) [A2C][sync] it= 5422 steps= 650640 (+120) avg10= -81.15 loss=282.211 pg=0.011 vf=470.357 H=0.945 gn=1023.742 [worker 3] episodes_seen=1890 last_return=-106.7 (+1 eps) [worker 0] episodes_seen=1890 last_return=-86.9 (+1 eps) [worker 1] episodes_seen=1880 last_return=-100.7 (+1 eps) [worker 3] episodes_seen=1900 last_return=-58.1 (+1 eps) [worker 2] episodes_seen=1890 last_return=-112.7 (+1 eps) [worker 1] episodes_seen=1890 last_return=-155.5 (+1 eps) [worker 3] episodes_seen=1910 last_return=-30.3 (+1 eps) [worker 2] episodes_seen=1900 last_return=-352.7 (+1 eps) [worker 0] episodes_seen=1900 last_return=-139.8 (+1 eps) [worker 1] episodes_seen=1900 last_return=-178.9 (+1 eps) [worker 0] episodes_seen=1910 last_return=-72.3 (+1 eps) [worker 3] episodes_seen=1920 last_return=-152.8 (+1 eps) [worker 2] episodes_seen=1910 last_return=-111.6 (+1 eps) [worker 1] episodes_seen=1910 last_return=-94.0 (+1 eps) [worker 2] episodes_seen=1920 last_return=-124.2 (+1 eps) [worker 0] episodes_seen=1920 last_return=-34.7 (+1 eps) [worker 3] episodes_seen=1930 last_return=-121.2 (+1 eps) [worker 1] episodes_seen=1920 last_return=-79.3 (+1 eps) [worker 0] episodes_seen=1930 last_return=-76.9 (+1 eps) [worker 3] episodes_seen=1940 last_return=-95.3 (+1 eps) [worker 2] episodes_seen=1930 last_return=-56.4 (+1 eps) [worker 1] episodes_seen=1930 last_return=-74.2 (+1 eps) [worker 0] episodes_seen=1940 last_return=-318.1 (+1 eps) [worker 3] episodes_seen=1950 last_return=-240.7 (+1 eps) [worker 2] episodes_seen=1940 last_return=-313.3 (+1 eps) [worker 1] episodes_seen=1940 last_return=-224.9 (+1 eps) [worker 3] episodes_seen=1960 last_return=-237.3 (+1 eps) [worker 0] episodes_seen=1950 last_return=-134.8 (+1 eps) [worker 2] episodes_seen=1950 last_return=-256.9 (+1 eps) [worker 1] episodes_seen=1950 last_return=-201.0 (+1 eps) [worker 3] episodes_seen=1970 last_return=4.9 (+1 eps) [worker 0] episodes_seen=1960 last_return=-239.1 (+1 eps) [worker 2] episodes_seen=1960 last_return=-79.4 (+1 eps) [worker 1] episodes_seen=1960 last_return=-66.8 (+1 eps) [worker 0] episodes_seen=1970 last_return=-74.7 (+1 eps) [worker 3] episodes_seen=1980 last_return=-79.0 (+1 eps) [worker 2] episodes_seen=1970 last_return=-74.5 (+1 eps) [worker 1] episodes_seen=1970 last_return=-50.1 (+1 eps) [worker 0] episodes_seen=1980 last_return=-81.3 (+1 eps) [worker 3] episodes_seen=1990 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1980 last_return=-107.1 (+1 eps) [worker 1] episodes_seen=1980 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1990 last_return=-63.8 (+1 eps) [worker 3] episodes_seen=2000 last_return=-84.1 (+1 eps) [worker 2] episodes_seen=1990 last_return=-105.8 (+1 eps) [A2C][sync] it= 5839 steps= 700680 (+120) avg10=-126.76 loss=62.905 pg=-0.039 vf=104.933 H=1.059 gn=407.999 [worker 0] episodes_seen=2000 last_return=-79.2 (+1 eps) [worker 1] episodes_seen=1990 last_return=-79.4 (+1 eps) [worker 3] episodes_seen=2010 last_return=48.9 (+1 eps) [worker 2] episodes_seen=2000 last_return=-84.4 (+1 eps) [worker 0] episodes_seen=2010 last_return=-88.2 (+1 eps) [worker 1] episodes_seen=2000 last_return=-84.0 (+1 eps) [worker 3] episodes_seen=2020 last_return=-76.8 (+1 eps) [worker 2] episodes_seen=2010 last_return=-93.8 (+1 eps) [worker 1] episodes_seen=2010 last_return=-90.9 (+1 eps) [worker 0] episodes_seen=2020 last_return=-28.9 (+1 eps) [worker 3] episodes_seen=2030 last_return=-115.2 (+1 eps) [worker 1] episodes_seen=2020 last_return=-161.9 (+1 eps) [worker 0] episodes_seen=2030 last_return=-245.6 (+1 eps) [worker 3] episodes_seen=2040 last_return=-136.6 (+1 eps) [worker 2] episodes_seen=2020 last_return=-270.9 (+1 eps) [worker 1] episodes_seen=2030 last_return=-114.6 (+1 eps) [worker 0] episodes_seen=2040 last_return=-112.7 (+1 eps) [worker 3] episodes_seen=2050 last_return=-64.7 (+1 eps) [worker 2] episodes_seen=2030 last_return=35.0 (+1 eps) [worker 0] episodes_seen=2050 last_return=-49.8 (+1 eps) [worker 1] episodes_seen=2040 last_return=-55.3 (+1 eps) [worker 3] episodes_seen=2060 last_return=-81.9 (+1 eps) [worker 2] episodes_seen=2040 last_return=-65.5 (+1 eps) [worker 1] episodes_seen=2050 last_return=-295.9 (+1 eps) [worker 0] episodes_seen=2060 last_return=-335.3 (+1 eps) [worker 2] episodes_seen=2050 last_return=-114.2 (+1 eps) [worker 3] episodes_seen=2070 last_return=-271.7 (+1 eps) [worker 1] episodes_seen=2060 last_return=-366.5 (+1 eps) [worker 0] episodes_seen=2070 last_return=-116.8 (+1 eps) [worker 2] episodes_seen=2060 last_return=-328.9 (+1 eps) [worker 3] episodes_seen=2080 last_return=-220.8 (+1 eps) [worker 1] episodes_seen=2070 last_return=-306.3 (+1 eps) [worker 0] episodes_seen=2080 last_return=-287.0 (+1 eps) [worker 3] episodes_seen=2090 last_return=-307.5 (+1 eps) [worker 2] episodes_seen=2070 last_return=-230.9 (+1 eps) [worker 1] episodes_seen=2080 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=2090 last_return=-381.0 (+1 eps) [worker 3] episodes_seen=2100 last_return=-184.6 (+1 eps) [worker 2] episodes_seen=2080 last_return=-173.3 (+1 eps) [worker 1] episodes_seen=2090 last_return=-103.7 (+1 eps) [worker 0] episodes_seen=2100 last_return=-147.8 (+1 eps) [worker 3] episodes_seen=2110 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=2090 last_return=-294.0 (+1 eps) [worker 1] episodes_seen=2100 last_return=-190.1 (+1 eps) [worker 0] episodes_seen=2110 last_return=-62.3 (+1 eps) [worker 3] episodes_seen=2120 last_return=-154.9 (+1 eps) [worker 2] episodes_seen=2100 last_return=-92.3 (+1 eps) [worker 1] episodes_seen=2110 last_return=-436.7 (+1 eps) [worker 0] episodes_seen=2120 last_return=-519.4 (+1 eps) [worker 3] episodes_seen=2130 last_return=-176.4 (+1 eps) [worker 2] episodes_seen=2110 last_return=-379.8 (+1 eps) [worker 1] episodes_seen=2120 last_return=-280.8 (+1 eps) [worker 0] episodes_seen=2130 last_return=-401.0 (+1 eps) [worker 3] episodes_seen=2140 last_return=-139.4 (+1 eps) [worker 2] episodes_seen=2120 last_return=-170.2 (+1 eps) [A2C][sync] it= 6256 steps= 750720 (+120) avg10=-236.09 loss=9070.264 pg=0.028 vf=15117.075 H=0.657 gn=11244.428 [worker 1] episodes_seen=2130 last_return=-133.9 (+1 eps) [worker 0] episodes_seen=2140 last_return=-264.5 (+1 eps) [worker 3] episodes_seen=2150 last_return=-100.5 (+1 eps) [worker 2] episodes_seen=2130 last_return=-131.4 (+1 eps) [worker 1] episodes_seen=2140 last_return=-175.0 (+1 eps) [worker 0] episodes_seen=2150 last_return=-268.8 (+1 eps) [worker 3] episodes_seen=2160 last_return=-324.2 (+1 eps) [worker 2] episodes_seen=2140 last_return=-267.3 (+1 eps) [worker 1] episodes_seen=2150 last_return=-173.5 (+1 eps) [worker 0] episodes_seen=2160 last_return=-291.4 (+1 eps) [worker 3] episodes_seen=2170 last_return=-121.7 (+1 eps) [worker 2] episodes_seen=2150 last_return=-323.3 (+1 eps) [worker 1] episodes_seen=2160 last_return=-198.6 (+1 eps) [worker 0] episodes_seen=2170 last_return=-150.3 (+1 eps) [worker 3] episodes_seen=2180 last_return=-138.1 (+1 eps) [worker 2] episodes_seen=2160 last_return=-201.5 (+1 eps) [worker 1] episodes_seen=2170 last_return=-353.8 (+1 eps) [worker 0] episodes_seen=2180 last_return=-266.5 (+1 eps) [worker 3] episodes_seen=2190 last_return=-297.0 (+1 eps) [worker 2] episodes_seen=2170 last_return=-108.9 (+1 eps) [worker 1] episodes_seen=2180 last_return=-158.7 (+1 eps) [worker 0] episodes_seen=2190 last_return=-124.2 (+1 eps) [worker 3] episodes_seen=2200 last_return=-109.6 (+1 eps) [worker 2] episodes_seen=2180 last_return=-171.8 (+1 eps) [worker 1] episodes_seen=2190 last_return=-93.0 (+1 eps) [worker 0] episodes_seen=2200 last_return=-97.6 (+1 eps) [worker 3] episodes_seen=2210 last_return=-63.6 (+1 eps) [worker 2] episodes_seen=2190 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2200 last_return=-91.7 (+1 eps) [worker 0] episodes_seen=2210 last_return=-60.2 (+1 eps) [worker 3] episodes_seen=2220 last_return=-77.6 (+1 eps) [worker 2] episodes_seen=2200 last_return=-140.3 (+1 eps) [worker 1] episodes_seen=2210 last_return=-130.9 (+1 eps) [worker 0] episodes_seen=2220 last_return=-96.7 (+1 eps) [worker 3] episodes_seen=2230 last_return=-44.6 (+1 eps) [worker 2] episodes_seen=2210 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2220 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=2230 last_return=-135.0 (+1 eps) [worker 3] episodes_seen=2240 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=2220 last_return=-162.6 (+1 eps) [worker 1] episodes_seen=2230 last_return=47.5 (+1 eps) [worker 0] episodes_seen=2240 last_return=-93.5 (+1 eps) [worker 3] episodes_seen=2250 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=2230 last_return=-50.6 (+1 eps) [worker 0] episodes_seen=2250 last_return=-116.4 (+1 eps) [worker 1] episodes_seen=2240 last_return=-96.6 (+1 eps) [worker 3] episodes_seen=2260 last_return=-222.7 (+1 eps) [worker 2] episodes_seen=2240 last_return=-162.0 (+1 eps) [worker 0] episodes_seen=2260 last_return=-103.0 (+1 eps) [worker 1] episodes_seen=2250 last_return=37.6 (+1 eps) [worker 3] episodes_seen=2270 last_return=-59.9 (+1 eps) [worker 2] episodes_seen=2250 last_return=-75.8 (+1 eps) [worker 0] episodes_seen=2270 last_return=-107.0 (+1 eps) [worker 1] episodes_seen=2260 last_return=-39.6 (+1 eps) [worker 3] episodes_seen=2280 last_return=-117.4 (+1 eps) [worker 2] episodes_seen=2260 last_return=-59.9 (+1 eps) [worker 0] episodes_seen=2280 last_return=-62.1 (+1 eps) [worker 3] episodes_seen=2290 last_return=-292.7 (+1 eps) [worker 1] episodes_seen=2270 last_return=-161.1 (+1 eps) [worker 2] episodes_seen=2270 last_return=-115.3 (+1 eps) [A2C][sync] it= 6673 steps= 800760 (+120) avg10=-100.91 loss=969.424 pg=0.063 vf=1615.626 H=1.024 gn=1214.356 [worker 0] episodes_seen=2290 last_return=-65.7 (+1 eps) [worker 1] episodes_seen=2280 last_return=-115.2 (+1 eps) [worker 3] episodes_seen=2300 last_return=-38.2 (+1 eps) [worker 2] episodes_seen=2280 last_return=-50.4 (+1 eps) [worker 0] episodes_seen=2300 last_return=-135.1 (+1 eps) [worker 1] episodes_seen=2290 last_return=-97.8 (+1 eps) [worker 3] episodes_seen=2310 last_return=-90.6 (+1 eps) [worker 2] episodes_seen=2290 last_return=-95.8 (+1 eps) [worker 0] episodes_seen=2310 last_return=-94.4 (+1 eps) [worker 3] episodes_seen=2320 last_return=-77.7 (+1 eps) [worker 1] episodes_seen=2300 last_return=-68.2 (+1 eps) [worker 2] episodes_seen=2300 last_return=-67.9 (+1 eps) [worker 0] episodes_seen=2320 last_return=-110.8 (+1 eps) [worker 2] episodes_seen=2310 last_return=-116.5 (+1 eps) [worker 0] episodes_seen=2330 last_return=-102.0 (+1 eps) [worker 3] episodes_seen=2330 last_return=-79.3 (+1 eps) [worker 1] episodes_seen=2310 last_return=-48.8 (+1 eps) [worker 2] episodes_seen=2320 last_return=-84.8 (+1 eps) [worker 0] episodes_seen=2340 last_return=-13.2 (+1 eps) [worker 1] episodes_seen=2320 last_return=-278.5 (+1 eps) [worker 2] episodes_seen=2330 last_return=-120.3 (+1 eps) [worker 3] episodes_seen=2340 last_return=-229.1 (+1 eps) [worker 0] episodes_seen=2350 last_return=-40.2 (+1 eps) [worker 1] episodes_seen=2330 last_return=-62.9 (+1 eps) [worker 3] episodes_seen=2350 last_return=-77.4 (+1 eps) [worker 0] episodes_seen=2360 last_return=-72.1 (+1 eps) [worker 1] episodes_seen=2340 last_return=-84.6 (+1 eps) [worker 2] episodes_seen=2340 last_return=-101.0 (+1 eps) [worker 3] episodes_seen=2360 last_return=-103.4 (+1 eps) [worker 0] episodes_seen=2370 last_return=-78.4 (+1 eps) [worker 2] episodes_seen=2350 last_return=-83.4 (+1 eps) [worker 1] episodes_seen=2350 last_return=-72.2 (+1 eps) [worker 3] episodes_seen=2370 last_return=-120.1 (+1 eps) [worker 2] episodes_seen=2360 last_return=-102.9 (+1 eps) [worker 0] episodes_seen=2380 last_return=-143.2 (+1 eps) [worker 1] episodes_seen=2360 last_return=-79.6 (+1 eps) [worker 3] episodes_seen=2380 last_return=-56.0 (+1 eps) [worker 2] episodes_seen=2370 last_return=-50.0 (+1 eps) [worker 0] episodes_seen=2390 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2370 last_return=-61.7 (+1 eps) [worker 3] episodes_seen=2390 last_return=-227.3 (+1 eps) [worker 1] episodes_seen=2380 last_return=-195.8 (+1 eps) [worker 2] episodes_seen=2380 last_return=-143.8 (+1 eps) [worker 0] episodes_seen=2400 last_return=-126.2 (+1 eps) [worker 3] episodes_seen=2400 last_return=-138.1 (+1 eps) [worker 2] episodes_seen=2390 last_return=-224.6 (+1 eps) [worker 0] episodes_seen=2410 last_return=-184.0 (+1 eps) [worker 1] episodes_seen=2390 last_return=-113.6 (+1 eps) [worker 3] episodes_seen=2410 last_return=-75.7 (+1 eps) [A2C][sync] it= 7090 steps= 850800 (+120) avg10= -67.66 loss=264.969 pg=-0.126 vf=441.852 H=1.092 gn=2112.550 [worker 0] episodes_seen=2420 last_return=-60.8 (+1 eps) [worker 2] episodes_seen=2400 last_return=-104.1 (+1 eps) [worker 1] episodes_seen=2400 last_return=-40.7 (+1 eps) [worker 3] episodes_seen=2420 last_return=-86.2 (+1 eps) [worker 2] episodes_seen=2410 last_return=-82.3 (+1 eps) [worker 1] episodes_seen=2410 last_return=-63.3 (+1 eps) [worker 0] episodes_seen=2430 last_return=-92.5 (+1 eps) [worker 3] episodes_seen=2430 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=2420 last_return=-257.2 (+1 eps) [worker 0] episodes_seen=2440 last_return=-231.6 (+1 eps) [worker 1] episodes_seen=2420 last_return=-240.0 (+1 eps) [worker 3] episodes_seen=2440 last_return=-108.9 (+1 eps) [worker 2] episodes_seen=2430 last_return=-118.7 (+1 eps) [worker 0] episodes_seen=2450 last_return=-271.5 (+1 eps) [worker 1] episodes_seen=2430 last_return=-309.9 (+1 eps) [worker 3] episodes_seen=2450 last_return=-87.0 (+1 eps) [worker 2] episodes_seen=2440 last_return=-18.4 (+1 eps) [worker 1] episodes_seen=2440 last_return=-207.4 (+1 eps) [worker 0] episodes_seen=2460 last_return=-116.5 (+1 eps) [worker 3] episodes_seen=2460 last_return=-63.3 (+1 eps) [worker 2] episodes_seen=2450 last_return=-79.6 (+1 eps) [worker 1] episodes_seen=2450 last_return=-49.3 (+1 eps) [worker 0] episodes_seen=2470 last_return=-90.1 (+1 eps) [worker 3] episodes_seen=2470 last_return=-106.9 (+1 eps) [worker 2] episodes_seen=2460 last_return=-88.9 (+1 eps) [worker 1] episodes_seen=2460 last_return=-75.6 (+1 eps) [worker 0] episodes_seen=2480 last_return=-97.0 (+1 eps) [worker 3] episodes_seen=2480 last_return=-87.3 (+1 eps) [worker 2] episodes_seen=2470 last_return=-77.4 (+1 eps) [worker 1] episodes_seen=2470 last_return=-49.9 (+1 eps) [worker 0] episodes_seen=2490 last_return=-51.9 (+1 eps) [worker 3] episodes_seen=2490 last_return=-76.0 (+1 eps) [worker 2] episodes_seen=2480 last_return=-65.1 (+1 eps) [worker 1] episodes_seen=2480 last_return=-83.4 (+1 eps) [worker 0] episodes_seen=2500 last_return=-68.6 (+1 eps) [worker 3] episodes_seen=2500 last_return=-144.1 (+1 eps) [worker 2] episodes_seen=2490 last_return=-85.7 (+1 eps) [worker 1] episodes_seen=2490 last_return=-128.9 (+1 eps) [worker 0] episodes_seen=2510 last_return=-33.4 (+1 eps) [worker 3] episodes_seen=2510 last_return=-205.9 (+1 eps) [worker 2] episodes_seen=2500 last_return=-47.8 (+1 eps) [worker 0] episodes_seen=2520 last_return=-48.7 (+1 eps) [worker 1] episodes_seen=2500 last_return=-59.7 (+1 eps) [worker 3] episodes_seen=2520 last_return=-81.1 (+1 eps) [worker 2] episodes_seen=2510 last_return=-117.2 (+1 eps) [worker 0] episodes_seen=2530 last_return=-72.1 (+1 eps) [A2C][sync] it= 7507 steps= 900840 (+120) avg10= -58.46 loss=134.598 pg=0.068 vf=224.242 H=1.017 gn=660.307 [worker 3] episodes_seen=2530 last_return=-69.1 (+1 eps) [worker 1] episodes_seen=2510 last_return=-39.8 (+1 eps) [worker 0] episodes_seen=2540 last_return=-73.9 (+1 eps) [worker 3] episodes_seen=2540 last_return=-91.5 (+1 eps) [worker 0] episodes_seen=2550 last_return=-218.4 (+1 eps) [worker 1] episodes_seen=2520 last_return=-300.9 (+1 eps) [worker 2] episodes_seen=2520 last_return=-118.6 (+1 eps) [worker 3] episodes_seen=2550 last_return=-112.4 (+1 eps) [worker 0] episodes_seen=2560 last_return=-100.1 (+1 eps) [worker 2] episodes_seen=2530 last_return=-68.4 (+1 eps) [worker 1] episodes_seen=2530 last_return=-69.6 (+1 eps) [worker 0] episodes_seen=2570 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=2540 last_return=-61.3 (+1 eps) [worker 1] episodes_seen=2540 last_return=-104.5 (+1 eps) [worker 3] episodes_seen=2560 last_return=-66.1 (+1 eps) [worker 0] episodes_seen=2580 last_return=-116.5 (+1 eps) [worker 2] episodes_seen=2550 last_return=-104.1 (+1 eps) [worker 3] episodes_seen=2570 last_return=-59.3 (+1 eps) [worker 1] episodes_seen=2550 last_return=-72.5 (+1 eps) [worker 0] episodes_seen=2590 last_return=-258.0 (+1 eps) [worker 2] episodes_seen=2560 last_return=-296.6 (+1 eps) [worker 1] episodes_seen=2560 last_return=-171.0 (+1 eps) [worker 3] episodes_seen=2580 last_return=-270.7 (+1 eps) [worker 0] episodes_seen=2600 last_return=-225.0 (+1 eps) [worker 2] episodes_seen=2570 last_return=-172.5 (+1 eps) [worker 1] episodes_seen=2570 last_return=-132.9 (+1 eps) [worker 3] episodes_seen=2590 last_return=-137.2 (+1 eps) [worker 0] episodes_seen=2610 last_return=-223.6 (+1 eps) [worker 2] episodes_seen=2580 last_return=-284.1 (+1 eps) [worker 1] episodes_seen=2580 last_return=-301.1 (+1 eps) [worker 0] episodes_seen=2620 last_return=-178.1 (+1 eps) [worker 3] episodes_seen=2600 last_return=-198.1 (+1 eps) [worker 2] episodes_seen=2590 last_return=-143.6 (+1 eps) [worker 1] episodes_seen=2590 last_return=-342.4 (+1 eps) [worker 0] episodes_seen=2630 last_return=-291.4 (+1 eps) [worker 2] episodes_seen=2600 last_return=-136.9 (+1 eps) [worker 3] episodes_seen=2610 last_return=-181.1 (+1 eps) [worker 1] episodes_seen=2600 last_return=-195.3 (+1 eps) [A2C][sync] it= 7924 steps= 950880 (+120) avg10=-216.54 loss=6131.642 pg=0.021 vf=10219.369 H=0.025 gn=72463.367 [worker 0] episodes_seen=2640 last_return=-236.1 (+1 eps) [worker 3] episodes_seen=2620 last_return=-192.7 (+1 eps) [worker 2] episodes_seen=2610 last_return=-300.7 (+1 eps) [worker 1] episodes_seen=2610 last_return=-138.6 (+1 eps) [worker 0] episodes_seen=2650 last_return=-331.2 (+1 eps) [worker 2] episodes_seen=2620 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2620 last_return=-217.1 (+1 eps) [worker 3] episodes_seen=2630 last_return=-144.7 (+1 eps) [worker 0] episodes_seen=2660 last_return=-256.4 (+1 eps) [worker 2] episodes_seen=2630 last_return=-142.7 (+1 eps) [worker 3] episodes_seen=2640 last_return=-202.3 (+1 eps) [worker 1] episodes_seen=2630 last_return=-218.4 (+1 eps) [worker 0] episodes_seen=2670 last_return=-227.6 (+1 eps) [worker 2] episodes_seen=2640 last_return=-248.2 (+1 eps) [worker 1] episodes_seen=2640 last_return=-192.1 (+1 eps) [worker 3] episodes_seen=2650 last_return=-374.3 (+1 eps) [worker 0] episodes_seen=2680 last_return=-113.3 (+1 eps) [worker 2] episodes_seen=2650 last_return=-104.4 (+1 eps) [worker 1] episodes_seen=2650 last_return=-64.4 (+1 eps) [worker 3] episodes_seen=2660 last_return=-56.3 (+1 eps) [worker 0] episodes_seen=2690 last_return=-56.6 (+1 eps) [worker 2] episodes_seen=2660 last_return=-166.3 (+1 eps) [worker 1] episodes_seen=2660 last_return=-43.4 (+1 eps) [worker 3] episodes_seen=2670 last_return=-84.4 (+1 eps) [worker 0] episodes_seen=2700 last_return=-98.7 (+1 eps) [worker 2] episodes_seen=2670 last_return=-167.6 (+1 eps) [worker 1] episodes_seen=2670 last_return=-122.5 (+1 eps) [worker 3] episodes_seen=2680 last_return=-77.2 (+1 eps) [worker 0] episodes_seen=2710 last_return=-29.8 (+1 eps) [worker 2] episodes_seen=2680 last_return=-137.5 (+1 eps) [worker 1] episodes_seen=2680 last_return=-223.4 (+1 eps) [worker 3] episodes_seen=2690 last_return=-59.7 (+1 eps) [worker 0] episodes_seen=2720 last_return=-111.1 (+1 eps) [worker 2] episodes_seen=2690 last_return=-75.1 (+1 eps) [worker 3] episodes_seen=2700 last_return=-47.2 (+1 eps) [worker 1] episodes_seen=2690 last_return=-172.4 (+1 eps) [worker 0] episodes_seen=2730 last_return=-120.9 (+1 eps) [worker 1] episodes_seen=2700 last_return=-110.8 (+1 eps) [worker 3] episodes_seen=2710 last_return=-83.1 (+1 eps) [worker 0] episodes_seen=2740 last_return=-79.7 (+1 eps) [worker 2] episodes_seen=2700 last_return=-100.8 (+1 eps) [worker 1] episodes_seen=2710 last_return=-102.7 (+1 eps) [worker 3] episodes_seen=2720 last_return=-72.7 (+1 eps) [worker 0] episodes_seen=2750 last_return=-115.0 (+1 eps) [worker 1] episodes_seen=2720 last_return=-79.0 (+1 eps) [worker 3] episodes_seen=2730 last_return=-62.3 (+1 eps) [worker 0] episodes_seen=2760 last_return=-304.9 (+1 eps) [A2C][sync] it= 8341 steps= 1000920 (+120) avg10=-159.77 loss=4995.396 pg=-0.116 vf=8325.874 H=0.813 gn=16604.949 [worker 2] episodes_seen=2710 last_return=-272.9 (+1 eps) [worker 1] episodes_seen=2730 last_return=-98.8 (+1 eps) [worker 3] episodes_seen=2740 last_return=-58.4 (+1 eps) [worker 0] episodes_seen=2770 last_return=-142.0 (+1 eps) [worker 2] episodes_seen=2720 last_return=-84.0 (+1 eps) [worker 1] episodes_seen=2740 last_return=-254.0 (+1 eps) [worker 3] episodes_seen=2750 last_return=-93.9 (+1 eps) [worker 2] episodes_seen=2730 last_return=-103.2 (+1 eps) [worker 1] episodes_seen=2750 last_return=-87.2 (+1 eps) [worker 3] episodes_seen=2760 last_return=31.2 (+1 eps) [worker 0] episodes_seen=2780 last_return=-51.4 (+1 eps) [worker 2] episodes_seen=2740 last_return=-347.4 (+1 eps) [worker 1] episodes_seen=2760 last_return=-226.1 (+1 eps) [worker 3] episodes_seen=2770 last_return=-203.7 (+1 eps) [worker 0] episodes_seen=2790 last_return=-279.7 (+1 eps) [worker 2] episodes_seen=2750 last_return=-199.4 (+1 eps) [worker 1] episodes_seen=2770 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=2800 last_return=-73.4 (+1 eps) [worker 3] episodes_seen=2780 last_return=-84.3 (+1 eps) [worker 2] episodes_seen=2760 last_return=-120.5 (+1 eps) [worker 0] episodes_seen=2810 last_return=-228.5 (+1 eps) [worker 1] episodes_seen=2780 last_return=-108.7 (+1 eps) [worker 2] episodes_seen=2770 last_return=-202.1 (+1 eps) [worker 3] episodes_seen=2790 last_return=-131.2 (+1 eps) [worker 0] episodes_seen=2820 last_return=-114.3 (+1 eps) [worker 2] episodes_seen=2780 last_return=-56.4 (+1 eps) [worker 3] episodes_seen=2800 last_return=-86.7 (+1 eps) [worker 1] episodes_seen=2790 last_return=-47.2 (+1 eps) [worker 0] episodes_seen=2830 last_return=-86.6 (+1 eps) [worker 2] episodes_seen=2790 last_return=-64.4 (+1 eps) [worker 1] episodes_seen=2800 last_return=-86.1 (+1 eps) [worker 3] episodes_seen=2810 last_return=-68.7 (+1 eps) [worker 2] episodes_seen=2800 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2810 last_return=-134.6 (+1 eps) [worker 0] episodes_seen=2840 last_return=-152.6 (+1 eps) [A2C][sync] it= 8758 steps= 1050960 (+120) avg10= -81.98 loss=142.953 pg=-0.059 vf=238.378 H=0.974 gn=748.744 [worker 1] episodes_seen=2820 last_return=-74.4 (+1 eps) [worker 0] episodes_seen=2850 last_return=-76.2 (+1 eps) [worker 2] episodes_seen=2810 last_return=-99.3 (+1 eps) [worker 3] episodes_seen=2820 last_return=-87.9 (+1 eps) [worker 2] episodes_seen=2820 last_return=-86.2 (+1 eps) [worker 3] episodes_seen=2830 last_return=-81.2 (+1 eps) [worker 1] episodes_seen=2830 last_return=-111.8 (+1 eps) [worker 0] episodes_seen=2860 last_return=-99.0 (+1 eps) [worker 3] episodes_seen=2840 last_return=-119.2 (+1 eps) [worker 1] episodes_seen=2840 last_return=-84.0 (+1 eps) [worker 0] episodes_seen=2870 last_return=-110.0 (+1 eps) [worker 2] episodes_seen=2830 last_return=-83.9 (+1 eps) [worker 1] episodes_seen=2850 last_return=-91.5 (+1 eps) [worker 3] episodes_seen=2850 last_return=-82.6 (+1 eps) [worker 2] episodes_seen=2840 last_return=-60.7 (+1 eps) [worker 0] episodes_seen=2880 last_return=-90.9 (+1 eps) [worker 1] episodes_seen=2860 last_return=-19.3 (+1 eps) [worker 3] episodes_seen=2860 last_return=-45.3 (+1 eps) [worker 0] episodes_seen=2890 last_return=-203.9 (+1 eps) [worker 2] episodes_seen=2850 last_return=-91.8 (+1 eps) [worker 3] episodes_seen=2870 last_return=-142.3 (+1 eps) [worker 1] episodes_seen=2870 last_return=-67.9 (+1 eps) [worker 0] episodes_seen=2900 last_return=-110.8 (+1 eps) [worker 2] episodes_seen=2860 last_return=-121.6 (+1 eps) [worker 3] episodes_seen=2880 last_return=-69.6 (+1 eps) [worker 1] episodes_seen=2880 last_return=-19.1 (+1 eps) [worker 0] episodes_seen=2910 last_return=-24.7 (+1 eps) [worker 2] episodes_seen=2870 last_return=-67.2 (+1 eps) [A2C][sync] it= 9175 steps= 1101000 (+120) avg10= -72.24 loss=51.853 pg=-0.102 vf=86.615 H=0.930 gn=648.315 [worker 3] episodes_seen=2890 last_return=-78.8 (+1 eps) [worker 1] episodes_seen=2890 last_return=-109.5 (+1 eps) [worker 2] episodes_seen=2880 last_return=-102.4 (+1 eps) [worker 1] episodes_seen=2900 last_return=-110.4 (+1 eps) [worker 3] episodes_seen=2900 last_return=-84.2 (+1 eps) [worker 0] episodes_seen=2920 last_return=-128.9 (+1 eps) [worker 2] episodes_seen=2890 last_return=-90.5 (+1 eps) [worker 1] episodes_seen=2910 last_return=-147.3 (+1 eps) [worker 3] episodes_seen=2910 last_return=-268.5 (+1 eps) [worker 0] episodes_seen=2930 last_return=-221.6 (+1 eps) [worker 2] episodes_seen=2900 last_return=-217.3 (+1 eps) [worker 3] episodes_seen=2920 last_return=-104.5 (+1 eps) [worker 0] episodes_seen=2940 last_return=-77.9 (+1 eps) [worker 1] episodes_seen=2920 last_return=-105.4 (+1 eps) [worker 2] episodes_seen=2910 last_return=-53.3 (+1 eps) [worker 1] episodes_seen=2930 last_return=-111.0 (+1 eps) [worker 2] episodes_seen=2920 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=2950 last_return=-69.0 (+1 eps) [worker 3] episodes_seen=2930 last_return=-66.2 (+1 eps) [worker 2] episodes_seen=2930 last_return=-128.9 (+1 eps) [worker 0] episodes_seen=2960 last_return=-144.5 (+1 eps) [worker 3] episodes_seen=2940 last_return=-146.3 (+1 eps) [worker 1] episodes_seen=2940 last_return=-176.0 (+1 eps) [worker 2] episodes_seen=2940 last_return=-58.5 (+1 eps) [worker 1] episodes_seen=2950 last_return=-52.7 (+1 eps) [worker 3] episodes_seen=2950 last_return=-107.5 (+1 eps) [worker 0] episodes_seen=2970 last_return=-84.3 (+1 eps) [worker 1] episodes_seen=2960 last_return=-73.1 (+1 eps) [A2C][sync] it= 9592 steps= 1151040 (+120) avg10= -65.94 loss=197.939 pg=-0.104 vf=330.098 H=1.040 gn=1446.842 [worker 2] episodes_seen=2950 last_return=-114.5 (+1 eps) [worker 3] episodes_seen=2960 last_return=-47.0 (+1 eps) [worker 1] episodes_seen=2970 last_return=-85.8 (+1 eps) [worker 0] episodes_seen=2980 last_return=-88.1 (+1 eps) [worker 2] episodes_seen=2960 last_return=-50.2 (+1 eps) [worker 3] episodes_seen=2970 last_return=-24.5 (+1 eps) [worker 1] episodes_seen=2980 last_return=-71.3 (+1 eps) [worker 0] episodes_seen=2990 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=2970 last_return=-123.3 (+1 eps) [worker 3] episodes_seen=2980 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=2980 last_return=-75.5 (+1 eps) [worker 3] episodes_seen=2990 last_return=-130.0 (+1 eps) [worker 0] episodes_seen=3000 last_return=-56.1 (+1 eps) [worker 1] episodes_seen=2990 last_return=-86.6 (+1 eps) [worker 2] episodes_seen=2990 last_return=-60.8 (+1 eps) [worker 1] episodes_seen=3000 last_return=-116.2 (+1 eps) [worker 3] episodes_seen=3000 last_return=-94.9 (+1 eps) [worker 0] episodes_seen=3010 last_return=-118.3 (+1 eps) [worker 2] episodes_seen=3000 last_return=-76.0 (+1 eps) [worker 3] episodes_seen=3010 last_return=-110.7 (+1 eps) [worker 1] episodes_seen=3010 last_return=-53.4 (+1 eps) [worker 0] episodes_seen=3020 last_return=-101.8 (+1 eps) [worker 2] episodes_seen=3010 last_return=-87.4 (+1 eps) [A2C][sync] it=10009 steps= 1201080 (+120) avg10= -69.25 loss=168.014 pg=-0.051 vf=280.135 H=1.020 gn=914.542 [worker 3] episodes_seen=3020 last_return=-67.6 (+1 eps) [worker 2] episodes_seen=3020 last_return=-64.6 (+1 eps) [worker 0] episodes_seen=3030 last_return=1.4 (+1 eps) [worker 1] episodes_seen=3020 last_return=-72.9 (+1 eps) [worker 2] episodes_seen=3030 last_return=-64.2 (+1 eps) [worker 3] episodes_seen=3030 last_return=-115.4 (+1 eps) [worker 0] episodes_seen=3040 last_return=-61.4 (+1 eps) [worker 3] episodes_seen=3040 last_return=-108.3 (+1 eps) [worker 2] episodes_seen=3040 last_return=-95.9 (+1 eps) [worker 1] episodes_seen=3030 last_return=-69.2 (+1 eps) [worker 2] episodes_seen=3050 last_return=-57.6 (+1 eps) [worker 0] episodes_seen=3050 last_return=-46.8 (+1 eps) [worker 1] episodes_seen=3040 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=3050 last_return=-119.6 (+1 eps) [worker 2] episodes_seen=3060 last_return=-85.1 (+1 eps) [worker 0] episodes_seen=3060 last_return=-73.7 (+1 eps) [worker 3] episodes_seen=3060 last_return=-90.0 (+1 eps) [worker 2] episodes_seen=3070 last_return=-101.9 (+1 eps) [worker 1] episodes_seen=3050 last_return=-73.8 (+1 eps) [worker 0] episodes_seen=3070 last_return=-67.5 (+1 eps) [A2C][sync] it=10426 steps= 1251120 (+120) avg10= -67.95 loss=43.056 pg=-0.107 vf=71.962 H=0.944 gn=909.725 [worker 3] episodes_seen=3070 last_return=-63.5 (+1 eps) [worker 2] episodes_seen=3080 last_return=-67.5 (+1 eps) [worker 1] episodes_seen=3060 last_return=-46.1 (+1 eps) [worker 0] episodes_seen=3080 last_return=-104.3 (+1 eps) [worker 2] episodes_seen=3090 last_return=-135.7 (+1 eps) [worker 3] episodes_seen=3080 last_return=-114.5 (+1 eps) [worker 1] episodes_seen=3070 last_return=-76.5 (+1 eps) [worker 0] episodes_seen=3090 last_return=-82.2 (+1 eps) [worker 2] episodes_seen=3100 last_return=-89.0 (+1 eps) [worker 3] episodes_seen=3090 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=3080 last_return=-84.8 (+1 eps) [worker 0] episodes_seen=3100 last_return=-67.6 (+1 eps) [worker 2] episodes_seen=3110 last_return=-106.0 (+1 eps) [worker 3] episodes_seen=3100 last_return=-66.2 (+1 eps) [worker 1] episodes_seen=3090 last_return=-86.2 (+1 eps) [worker 0] episodes_seen=3110 last_return=-75.6 (+1 eps) [worker 3] episodes_seen=3110 last_return=-43.4 (+1 eps) [worker 2] episodes_seen=3120 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=3100 last_return=-173.0 (+1 eps) [A2C][sync] it=10843 steps= 1301160 (+120) avg10=-101.87 loss=34.342 pg=0.037 vf=57.202 H=1.078 gn=205.748 [worker 3] episodes_seen=3120 last_return=-105.3 (+1 eps) [worker 0] episodes_seen=3120 last_return=-83.0 (+1 eps) [worker 1] episodes_seen=3110 last_return=-86.6 (+1 eps) [worker 2] episodes_seen=3130 last_return=-85.6 (+1 eps) [worker 3] episodes_seen=3130 last_return=-61.8 (+1 eps) [worker 0] episodes_seen=3130 last_return=-85.7 (+1 eps) [worker 2] episodes_seen=3140 last_return=-10.5 (+1 eps) [worker 1] episodes_seen=3120 last_return=-94.2 (+1 eps) [worker 3] episodes_seen=3140 last_return=-78.3 (+1 eps) [worker 0] episodes_seen=3140 last_return=-118.0 (+1 eps) [worker 2] episodes_seen=3150 last_return=-51.4 (+1 eps) [worker 1] episodes_seen=3130 last_return=-74.2 (+1 eps) [worker 0] episodes_seen=3150 last_return=-69.0 (+1 eps) [worker 3] episodes_seen=3150 last_return=-64.7 (+1 eps) [worker 2] episodes_seen=3160 last_return=-92.5 (+1 eps) [worker 1] episodes_seen=3140 last_return=-75.0 (+1 eps) [worker 0] episodes_seen=3160 last_return=-49.4 (+1 eps) [worker 3] episodes_seen=3160 last_return=-73.9 (+1 eps) [worker 2] episodes_seen=3170 last_return=-1.2 (+1 eps) [worker 1] episodes_seen=3150 last_return=-87.8 (+1 eps) [worker 0] episodes_seen=3170 last_return=-110.8 (+1 eps) [worker 3] episodes_seen=3170 last_return=-94.8 (+1 eps) [worker 2] episodes_seen=3180 last_return=-85.9 (+1 eps) [A2C][sync] it=11260 steps= 1351200 (+120) avg10= -84.04 loss=158.965 pg=-0.107 vf=265.140 H=0.781 gn=1451.315 [worker 1] episodes_seen=3160 last_return=-90.8 (+1 eps) [worker 0] episodes_seen=3180 last_return=-93.4 (+1 eps) [worker 3] episodes_seen=3180 last_return=-90.8 (+1 eps) [worker 2] episodes_seen=3190 last_return=-89.9 (+1 eps) [worker 1] episodes_seen=3170 last_return=-118.6 (+1 eps) [worker 0] episodes_seen=3190 last_return=-133.6 (+1 eps) [worker 3] episodes_seen=3190 last_return=-115.4 (+1 eps) [worker 2] episodes_seen=3200 last_return=-87.4 (+1 eps) [worker 1] episodes_seen=3180 last_return=-83.8 (+1 eps) [worker 3] episodes_seen=3200 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=3200 last_return=-1.8 (+1 eps) [worker 2] episodes_seen=3210 last_return=-92.1 (+1 eps) [worker 1] episodes_seen=3190 last_return=-66.0 (+1 eps) [worker 0] episodes_seen=3210 last_return=-83.4 (+1 eps) [worker 2] episodes_seen=3220 last_return=-86.7 (+1 eps) [worker 3] episodes_seen=3210 last_return=-308.1 (+1 eps) [worker 1] episodes_seen=3200 last_return=-233.7 (+1 eps) [worker 0] episodes_seen=3220 last_return=-59.5 (+1 eps) [worker 2] episodes_seen=3230 last_return=-119.7 (+1 eps) [worker 1] episodes_seen=3210 last_return=-98.7 (+1 eps) [A2C][sync] it=11667 steps= 1400040 (+120) avg10= -87.85 loss=231.437 pg=0.125 vf=385.545 H=0.982 gn=1206.631 [Checkpoint] Saved self-describing checkpoint → part2_artifacts/checkpoints/a2c_run10_seed1227.pth [A2C][sync] done: steps=1400040 time=1319.1s avg10=-87.85
[Run run10_seed1227] checkpoint: part2_artifacts/checkpoints/a2c_run10_seed1227.pth [Run run10_seed1227] training plot (tail 500): part2_artifacts/train_curve_run10_seed1227.png [Run run10_seed1227] training plot (full): part2_artifacts/train_curve_full_run10_seed1227.png [Run run10_seed1227] per-worker plot: part2_artifacts/train_curve_workers_run10_seed1227.png [Run run10_seed1227] workers-average plot: part2_artifacts/train_curve_workers_avg_run10_seed1227.png
[Eval run10_seed1227] mean=-164.21 std=26.76 min=-202.09 max=-100.87 [Eval run10_seed1227] CSV: part2_artifacts/eval10_run10_seed1227.csv [Eval run10_seed1227] plot: part2_artifacts/eval10_run10_seed1227.png [Best] ep=2 return=-100.87 seed=1229
/usr/local/lib/python3.12/dist-packages/gymnasium/wrappers/rendering.py:293: UserWarning: WARN: Overwriting existing videos at /content/part2_artifacts/videos/run10_seed1227 folder (try specifying a different `video_folder` for the `RecordVideo` wrapper if this is not desired)
logger.warn(
[Video run10_seed1227] episode return=-100.87 [Video run10_seed1227] saved under: part2_artifacts/videos run10_seed1227 | mean=-164.2±26.8 | best_ep=2, best_ret=-100.9
Run#11
run_id = f"run11_seed{SEED}"
model, logs, paths = train_once(
run_id=run_id,
n_workers=2,
total_env_steps=600_000,
T=10,
gamma=0.99,
entropy_coef=0.02,
value_coef=0.55,
max_grad_norm=0.5,
lr=3e-4,
log_every=50_000,
)
metrics, _ = evaluate_10(run_id, paths.ckpt_path)
video_dir, best_idx, best_ret = record_best_from_eval(run_id, paths.ckpt_path)
print(f"{run_id} | mean={metrics['mean']:.1f}±{metrics['std']:.1f} | best_ep={best_idx}, best_ret={best_ret:.1f}")
[Run run11_seed1227] starting training… [A2C][sync] start: workers=2, T=10, target_steps=600000, mp=fork [A2C][sync] it= 1 steps= 20 (+ 20) avg10= nan loss=32.959 pg=0.000 vf=59.976 H=1.386 gn=6.577 [worker 0] episodes_seen=10 last_return=-165.6 (+1 eps) [worker 1] episodes_seen=10 last_return=-166.8 (+1 eps) [worker 0] episodes_seen=20 last_return=-114.7 (+1 eps) [worker 1] episodes_seen=20 last_return=-167.2 (+1 eps) [worker 1] episodes_seen=30 last_return=-155.0 (+1 eps) [worker 0] episodes_seen=30 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=40 last_return=-128.6 (+1 eps) [worker 1] episodes_seen=40 last_return=-138.9 (+1 eps) [worker 0] episodes_seen=50 last_return=-129.3 (+1 eps) [worker 1] episodes_seen=50 last_return=-113.0 (+1 eps) [worker 0] episodes_seen=60 last_return=-160.0 (+1 eps) [worker 1] episodes_seen=60 last_return=-136.8 (+1 eps) [worker 0] episodes_seen=70 last_return=-111.0 (+1 eps) [worker 1] episodes_seen=70 last_return=-149.2 (+1 eps) [worker 0] episodes_seen=80 last_return=-152.2 (+1 eps) [worker 1] episodes_seen=80 last_return=-139.6 (+1 eps) [worker 0] episodes_seen=90 last_return=-173.0 (+1 eps) [worker 1] episodes_seen=90 last_return=-143.1 (+1 eps) [worker 0] episodes_seen=100 last_return=-132.8 (+1 eps) [worker 1] episodes_seen=100 last_return=-157.1 (+1 eps) [worker 0] episodes_seen=110 last_return=-136.8 (+1 eps) [worker 1] episodes_seen=110 last_return=-194.5 (+1 eps) [worker 0] episodes_seen=120 last_return=-130.9 (+1 eps) [worker 1] episodes_seen=120 last_return=-136.5 (+1 eps) [worker 0] episodes_seen=130 last_return=-115.7 (+1 eps) [worker 1] episodes_seen=130 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=140 last_return=-142.7 (+1 eps) [worker 0] episodes_seen=150 last_return=-136.3 (+1 eps) [worker 1] episodes_seen=140 last_return=-205.4 (+1 eps) [worker 0] episodes_seen=160 last_return=-130.5 (+1 eps) [worker 1] episodes_seen=150 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=170 last_return=-153.6 (+1 eps) [worker 1] episodes_seen=160 last_return=-118.6 (+1 eps) [worker 0] episodes_seen=180 last_return=-127.4 (+1 eps) [worker 1] episodes_seen=170 last_return=-140.5 (+1 eps) [worker 1] episodes_seen=180 last_return=-193.0 (+1 eps) [worker 0] episodes_seen=190 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=200 last_return=-138.4 (+1 eps) [worker 1] episodes_seen=190 last_return=-155.0 (+1 eps) [worker 0] episodes_seen=210 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=200 last_return=-121.8 (+1 eps) [A2C][sync] it= 2501 steps= 50020 (+ 20) avg10=-136.16 loss=350.483 pg=-0.000 vf=637.241 H=0.000 gn=34549.855 [worker 0] episodes_seen=220 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=210 last_return=-155.6 (+1 eps) [worker 0] episodes_seen=230 last_return=-121.9 (+1 eps) [worker 1] episodes_seen=220 last_return=-195.2 (+1 eps) [worker 0] episodes_seen=240 last_return=-181.9 (+1 eps) [worker 1] episodes_seen=230 last_return=-187.6 (+1 eps) [worker 0] episodes_seen=250 last_return=-120.6 (+1 eps) [worker 1] episodes_seen=240 last_return=-164.2 (+1 eps) [worker 0] episodes_seen=260 last_return=-205.0 (+1 eps) [worker 1] episodes_seen=250 last_return=-148.9 (+1 eps) [worker 0] episodes_seen=270 last_return=-189.1 (+1 eps) [worker 1] episodes_seen=260 last_return=-195.5 (+1 eps) [worker 0] episodes_seen=280 last_return=-106.4 (+1 eps) [worker 1] episodes_seen=270 last_return=-114.7 (+1 eps) [worker 0] episodes_seen=290 last_return=-186.0 (+1 eps) [worker 1] episodes_seen=280 last_return=-107.9 (+1 eps) [worker 0] episodes_seen=300 last_return=-172.3 (+1 eps) [worker 1] episodes_seen=290 last_return=-144.8 (+1 eps) [worker 0] episodes_seen=310 last_return=-128.2 (+1 eps) [worker 1] episodes_seen=300 last_return=-124.8 (+1 eps) [worker 0] episodes_seen=320 last_return=-174.7 (+1 eps) [worker 1] episodes_seen=310 last_return=-167.5 (+1 eps) [worker 0] episodes_seen=330 last_return=-147.1 (+1 eps) [worker 1] episodes_seen=320 last_return=-199.1 (+1 eps) [worker 0] episodes_seen=340 last_return=-127.4 (+1 eps) [worker 1] episodes_seen=330 last_return=-159.6 (+1 eps) [worker 0] episodes_seen=350 last_return=-142.4 (+1 eps) [worker 1] episodes_seen=340 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=360 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=350 last_return=-138.7 (+1 eps) [worker 0] episodes_seen=370 last_return=-190.2 (+1 eps) [worker 1] episodes_seen=360 last_return=-118.5 (+1 eps) [worker 0] episodes_seen=380 last_return=-83.8 (+1 eps) [worker 1] episodes_seen=370 last_return=-108.1 (+1 eps) [worker 0] episodes_seen=390 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=380 last_return=-212.4 (+1 eps) [worker 0] episodes_seen=400 last_return=-117.8 (+1 eps) [worker 1] episodes_seen=390 last_return=-189.6 (+1 eps) [A2C][sync] it= 5001 steps= 100020 (+ 20) avg10=-130.20 loss=729.729 pg=0.449 vf=1325.987 H=0.635 gn=29251.730 [worker 0] episodes_seen=410 last_return=-91.6 (+1 eps) [worker 1] episodes_seen=400 last_return=-94.5 (+1 eps) [worker 0] episodes_seen=420 last_return=-128.9 (+1 eps) [worker 1] episodes_seen=410 last_return=-99.0 (+1 eps) [worker 0] episodes_seen=430 last_return=-91.5 (+1 eps) [worker 1] episodes_seen=420 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=440 last_return=-130.2 (+1 eps) [worker 1] episodes_seen=430 last_return=-66.7 (+1 eps) [worker 0] episodes_seen=450 last_return=-145.4 (+1 eps) [worker 1] episodes_seen=440 last_return=-98.2 (+1 eps) [worker 0] episodes_seen=460 last_return=-84.5 (+1 eps) [worker 1] episodes_seen=450 last_return=-95.4 (+1 eps) [worker 0] episodes_seen=470 last_return=-117.2 (+1 eps) [worker 1] episodes_seen=460 last_return=-90.0 (+1 eps) [worker 0] episodes_seen=480 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=470 last_return=-94.1 (+1 eps) [worker 1] episodes_seen=480 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=490 last_return=-100.8 (+1 eps) [worker 1] episodes_seen=490 last_return=-185.8 (+1 eps) [worker 0] episodes_seen=500 last_return=-105.8 (+1 eps) [worker 0] episodes_seen=510 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=500 last_return=-85.6 (+1 eps) [worker 0] episodes_seen=520 last_return=-182.5 (+1 eps) [worker 1] episodes_seen=510 last_return=-97.0 (+1 eps) [worker 0] episodes_seen=530 last_return=-77.9 (+1 eps) [worker 1] episodes_seen=520 last_return=-138.6 (+1 eps) [worker 0] episodes_seen=540 last_return=-113.1 (+1 eps) [worker 1] episodes_seen=530 last_return=-148.8 (+1 eps) [worker 0] episodes_seen=550 last_return=-121.7 (+1 eps) [worker 1] episodes_seen=540 last_return=-88.7 (+1 eps) [worker 0] episodes_seen=560 last_return=-81.4 (+1 eps) [worker 1] episodes_seen=550 last_return=-96.1 (+1 eps) [worker 0] episodes_seen=570 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=560 last_return=-103.6 (+1 eps) [worker 0] episodes_seen=580 last_return=-68.9 (+1 eps) [worker 1] episodes_seen=570 last_return=-98.4 (+1 eps) [worker 0] episodes_seen=590 last_return=-80.3 (+1 eps) [worker 1] episodes_seen=580 last_return=-113.3 (+1 eps) [A2C][sync] it= 7501 steps= 150020 (+ 20) avg10=-103.81 loss=65.128 pg=0.013 vf=118.430 H=1.040 gn=231.640 [worker 0] episodes_seen=600 last_return=-81.0 (+1 eps) [worker 1] episodes_seen=590 last_return=-135.1 (+1 eps) [worker 0] episodes_seen=610 last_return=-66.1 (+1 eps) [worker 1] episodes_seen=600 last_return=-79.3 (+1 eps) [worker 0] episodes_seen=620 last_return=-114.7 (+1 eps) [worker 1] episodes_seen=610 last_return=-87.3 (+1 eps) [worker 0] episodes_seen=630 last_return=-59.8 (+1 eps) [worker 1] episodes_seen=620 last_return=13.3 (+1 eps) [worker 0] episodes_seen=640 last_return=-104.1 (+1 eps) [worker 1] episodes_seen=630 last_return=-117.8 (+1 eps) [worker 0] episodes_seen=650 last_return=100.0 (+1 eps) [worker 1] episodes_seen=640 last_return=-78.0 (+1 eps) [worker 0] episodes_seen=660 last_return=-95.1 (+1 eps) [worker 1] episodes_seen=650 last_return=-102.0 (+1 eps) [worker 1] episodes_seen=660 last_return=-97.0 (+1 eps) [worker 0] episodes_seen=670 last_return=-94.2 (+1 eps) [worker 0] episodes_seen=680 last_return=-109.7 (+1 eps) [worker 1] episodes_seen=670 last_return=-56.0 (+1 eps) [worker 0] episodes_seen=690 last_return=-103.2 (+1 eps) [worker 1] episodes_seen=680 last_return=-92.7 (+1 eps) [worker 0] episodes_seen=700 last_return=-113.2 (+1 eps) [A2C][sync] it=10001 steps= 200020 (+ 20) avg10= -98.35 loss=77.032 pg=-0.208 vf=140.446 H=0.264 gn=314.291 [worker 1] episodes_seen=690 last_return=-124.1 (+1 eps) [worker 0] episodes_seen=710 last_return=-68.1 (+1 eps) [worker 1] episodes_seen=700 last_return=-97.1 (+1 eps) [worker 0] episodes_seen=720 last_return=-97.1 (+1 eps) [worker 1] episodes_seen=710 last_return=-69.4 (+1 eps) [worker 0] episodes_seen=730 last_return=-106.2 (+1 eps) [worker 1] episodes_seen=720 last_return=-84.1 (+1 eps) [worker 0] episodes_seen=740 last_return=-96.7 (+1 eps) [worker 1] episodes_seen=730 last_return=-86.1 (+1 eps) [worker 0] episodes_seen=750 last_return=-96.3 (+1 eps) [worker 1] episodes_seen=740 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=760 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=750 last_return=-99.9 (+1 eps) [worker 0] episodes_seen=770 last_return=-64.0 (+1 eps) [worker 1] episodes_seen=760 last_return=-58.1 (+1 eps) [worker 0] episodes_seen=780 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=770 last_return=-91.8 (+1 eps) [worker 0] episodes_seen=790 last_return=-96.2 (+1 eps) [worker 1] episodes_seen=780 last_return=-72.9 (+1 eps) [worker 0] episodes_seen=800 last_return=-86.7 (+1 eps) [worker 1] episodes_seen=790 last_return=-43.1 (+1 eps) [worker 0] episodes_seen=810 last_return=-95.3 (+1 eps) [worker 1] episodes_seen=800 last_return=-101.0 (+1 eps) [worker 0] episodes_seen=820 last_return=-62.3 (+1 eps) [worker 1] episodes_seen=810 last_return=-79.1 (+1 eps) [worker 0] episodes_seen=830 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=820 last_return=-79.9 (+1 eps) [worker 0] episodes_seen=840 last_return=-78.8 (+1 eps) [worker 1] episodes_seen=830 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=850 last_return=-67.1 (+1 eps) [worker 1] episodes_seen=840 last_return=-67.0 (+1 eps) [worker 0] episodes_seen=860 last_return=-91.7 (+1 eps) [worker 1] episodes_seen=850 last_return=-78.2 (+1 eps) [worker 0] episodes_seen=870 last_return=-141.4 (+1 eps) [worker 1] episodes_seen=860 last_return=-62.6 (+1 eps) [worker 0] episodes_seen=880 last_return=-92.8 (+1 eps) [worker 1] episodes_seen=870 last_return=-85.0 (+1 eps) [worker 0] episodes_seen=890 last_return=-185.6 (+1 eps) [worker 1] episodes_seen=880 last_return=-102.1 (+1 eps) [worker 0] episodes_seen=900 last_return=-98.9 (+1 eps) [worker 1] episodes_seen=890 last_return=-125.5 (+1 eps) [worker 0] episodes_seen=910 last_return=-129.0 (+1 eps) [worker 1] episodes_seen=900 last_return=-85.1 (+1 eps) [worker 0] episodes_seen=920 last_return=-82.0 (+1 eps) [worker 1] episodes_seen=910 last_return=-82.0 (+1 eps) [worker 0] episodes_seen=930 last_return=-89.6 (+1 eps) [worker 1] episodes_seen=920 last_return=-93.9 (+1 eps) [worker 0] episodes_seen=940 last_return=-148.1 (+1 eps) [worker 1] episodes_seen=930 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=950 last_return=-80.5 (+1 eps) [worker 1] episodes_seen=940 last_return=-90.4 (+1 eps) [worker 0] episodes_seen=960 last_return=-80.5 (+1 eps) [worker 1] episodes_seen=950 last_return=-97.4 (+1 eps) [worker 0] episodes_seen=970 last_return=-74.1 (+1 eps) [worker 1] episodes_seen=960 last_return=-82.3 (+1 eps) [worker 0] episodes_seen=980 last_return=-69.4 (+1 eps) [worker 1] episodes_seen=970 last_return=-94.6 (+1 eps) [worker 0] episodes_seen=990 last_return=-107.3 (+1 eps) [worker 1] episodes_seen=980 last_return=-77.0 (+1 eps) [worker 0] episodes_seen=1000 last_return=-83.2 (+1 eps) [worker 1] episodes_seen=990 last_return=-119.6 (+1 eps) [worker 0] episodes_seen=1010 last_return=-102.6 (+1 eps) [A2C][sync] it=12501 steps= 250020 (+ 20) avg10=-100.53 loss=1.008 pg=0.260 vf=1.392 H=0.881 gn=23.853 [worker 1] episodes_seen=1000 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1020 last_return=-76.7 (+1 eps) [worker 0] episodes_seen=1030 last_return=-33.9 (+1 eps) [worker 1] episodes_seen=1010 last_return=-139.3 (+1 eps) [worker 0] episodes_seen=1040 last_return=-95.6 (+1 eps) [worker 1] episodes_seen=1020 last_return=-103.4 (+1 eps) [worker 0] episodes_seen=1050 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1030 last_return=-104.4 (+1 eps) [worker 0] episodes_seen=1060 last_return=-98.3 (+1 eps) [worker 1] episodes_seen=1040 last_return=-95.5 (+1 eps) [worker 0] episodes_seen=1070 last_return=-107.0 (+1 eps) [worker 1] episodes_seen=1050 last_return=-118.2 (+1 eps) [worker 0] episodes_seen=1080 last_return=-66.7 (+1 eps) [worker 1] episodes_seen=1060 last_return=-64.1 (+1 eps) [worker 0] episodes_seen=1090 last_return=-95.5 (+1 eps) [worker 0] episodes_seen=1100 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1070 last_return=-102.1 (+1 eps) [worker 0] episodes_seen=1110 last_return=-96.3 (+1 eps) [worker 1] episodes_seen=1080 last_return=-106.6 (+1 eps) [worker 0] episodes_seen=1120 last_return=-73.0 (+1 eps) [worker 1] episodes_seen=1090 last_return=-99.9 (+1 eps) [worker 0] episodes_seen=1130 last_return=-84.9 (+1 eps) [worker 1] episodes_seen=1100 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1140 last_return=-85.0 (+1 eps) [worker 1] episodes_seen=1110 last_return=-82.5 (+1 eps) [worker 0] episodes_seen=1150 last_return=-96.8 (+1 eps) [worker 1] episodes_seen=1120 last_return=-40.0 (+1 eps) [worker 0] episodes_seen=1160 last_return=-96.5 (+1 eps) [worker 1] episodes_seen=1130 last_return=-102.6 (+1 eps) [worker 0] episodes_seen=1170 last_return=-39.6 (+1 eps) [A2C][sync] it=15001 steps= 300020 (+ 20) avg10= -93.63 loss=4.256 pg=-0.298 vf=8.308 H=0.812 gn=231.450 [worker 1] episodes_seen=1140 last_return=-103.7 (+1 eps) [worker 0] episodes_seen=1180 last_return=-89.7 (+1 eps) [worker 0] episodes_seen=1190 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1150 last_return=-1.3 (+1 eps) [worker 0] episodes_seen=1200 last_return=99.8 (+1 eps) [worker 1] episodes_seen=1160 last_return=-95.2 (+1 eps) [worker 1] episodes_seen=1170 last_return=-85.3 (+1 eps) [worker 0] episodes_seen=1210 last_return=-88.3 (+1 eps) [worker 1] episodes_seen=1180 last_return=99.8 (+1 eps) [worker 0] episodes_seen=1220 last_return=-91.7 (+1 eps) [worker 1] episodes_seen=1190 last_return=-76.5 (+1 eps) [worker 0] episodes_seen=1230 last_return=-60.2 (+1 eps) [worker 1] episodes_seen=1200 last_return=-89.6 (+1 eps) [worker 0] episodes_seen=1240 last_return=-89.5 (+1 eps) [worker 0] episodes_seen=1250 last_return=-21.1 (+1 eps) [worker 1] episodes_seen=1210 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1260 last_return=-92.1 (+1 eps) [worker 1] episodes_seen=1220 last_return=-82.3 (+1 eps) [worker 1] episodes_seen=1230 last_return=-110.4 (+1 eps) [worker 0] episodes_seen=1270 last_return=-88.7 (+1 eps) [A2C][sync] it=17501 steps= 350020 (+ 20) avg10= -65.79 loss=1.829 pg=-0.145 vf=3.617 H=0.777 gn=114.070 [worker 0] episodes_seen=1280 last_return=-78.2 (+1 eps) [worker 1] episodes_seen=1240 last_return=99.8 (+1 eps) [worker 0] episodes_seen=1290 last_return=-99.2 (+1 eps) [worker 1] episodes_seen=1250 last_return=-57.6 (+1 eps) [worker 0] episodes_seen=1300 last_return=99.7 (+1 eps) [worker 1] episodes_seen=1260 last_return=-82.0 (+1 eps) [worker 0] episodes_seen=1310 last_return=-104.0 (+1 eps) [worker 1] episodes_seen=1270 last_return=99.9 (+1 eps) [worker 0] episodes_seen=1320 last_return=-91.3 (+1 eps) [worker 1] episodes_seen=1280 last_return=-1.4 (+1 eps) [worker 0] episodes_seen=1330 last_return=-105.2 (+1 eps) [A2C][sync] it=20001 steps= 400020 (+ 20) avg10= -83.32 loss=1.124 pg=-0.199 vf=2.429 H=0.656 gn=86.781 [worker 1] episodes_seen=1290 last_return=-100.8 (+1 eps) [worker 0] episodes_seen=1340 last_return=22.5 (+1 eps) [worker 1] episodes_seen=1300 last_return=-77.6 (+1 eps) [worker 0] episodes_seen=1350 last_return=-101.6 (+1 eps) [worker 1] episodes_seen=1310 last_return=-113.4 (+1 eps) [worker 0] episodes_seen=1360 last_return=-0.5 (+1 eps) [worker 1] episodes_seen=1320 last_return=1.4 (+1 eps) [worker 0] episodes_seen=1370 last_return=-97.2 (+1 eps) [worker 1] episodes_seen=1330 last_return=-107.9 (+1 eps) [worker 0] episodes_seen=1380 last_return=-87.7 (+1 eps) [worker 1] episodes_seen=1340 last_return=-100.9 (+1 eps) [worker 0] episodes_seen=1390 last_return=99.9 (+1 eps) [A2C][sync] it=22501 steps= 450020 (+ 20) avg10= -46.22 loss=23.987 pg=-0.284 vf=44.151 H=0.585 gn=445.944 [worker 0] episodes_seen=1400 last_return=-99.2 (+1 eps) [worker 1] episodes_seen=1350 last_return=-129.7 (+1 eps) [worker 0] episodes_seen=1410 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1360 last_return=-87.7 (+1 eps) [worker 0] episodes_seen=1420 last_return=-98.8 (+1 eps) [worker 1] episodes_seen=1370 last_return=-108.6 (+1 eps) [worker 0] episodes_seen=1430 last_return=-48.6 (+1 eps) [worker 1] episodes_seen=1380 last_return=99.9 (+1 eps) [worker 0] episodes_seen=1440 last_return=-62.0 (+1 eps) [worker 0] episodes_seen=1450 last_return=1.1 (+1 eps) [worker 1] episodes_seen=1390 last_return=-100.4 (+1 eps) [worker 1] episodes_seen=1400 last_return=-118.8 (+1 eps) [worker 0] episodes_seen=1460 last_return=-100.0 (+1 eps) [A2C][sync] it=25001 steps= 500020 (+ 20) avg10=-102.45 loss=36.043 pg=0.189 vf=65.219 H=0.844 gn=719.793 [worker 0] episodes_seen=1470 last_return=-41.0 (+1 eps) [worker 1] episodes_seen=1410 last_return=-90.6 (+1 eps) [worker 0] episodes_seen=1480 last_return=99.9 (+1 eps) [worker 1] episodes_seen=1420 last_return=-106.3 (+1 eps) [worker 1] episodes_seen=1430 last_return=-108.8 (+1 eps) [worker 0] episodes_seen=1490 last_return=-0.6 (+1 eps) [worker 1] episodes_seen=1440 last_return=99.8 (+1 eps) [worker 0] episodes_seen=1500 last_return=-104.4 (+1 eps) [worker 1] episodes_seen=1450 last_return=-100.0 (+1 eps) [A2C][sync] it=27501 steps= 550020 (+ 20) avg10= -51.10 loss=0.091 pg=0.004 vf=0.176 H=0.460 gn=36.700 [worker 0] episodes_seen=1510 last_return=-99.9 (+1 eps) [worker 1] episodes_seen=1460 last_return=100.0 (+1 eps) [worker 0] episodes_seen=1520 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1470 last_return=-100.6 (+1 eps) [worker 0] episodes_seen=1530 last_return=-1.6 (+1 eps) [worker 1] episodes_seen=1480 last_return=-104.1 (+1 eps) [worker 0] episodes_seen=1540 last_return=-80.0 (+1 eps) [worker 1] episodes_seen=1490 last_return=-80.8 (+1 eps) [worker 0] episodes_seen=1550 last_return=-102.4 (+1 eps) [worker 1] episodes_seen=1500 last_return=0.2 (+1 eps) [A2C][sync] it=30000 steps= 600000 (+ 20) avg10= -22.27 loss=7.105 pg=0.052 vf=12.831 H=0.215 gn=253.822 [Checkpoint] Saved self-describing checkpoint → part2_artifacts/checkpoints/a2c_run11_seed1227.pth [A2C][sync] done: steps=600000 time=841.3s avg10=-22.27
[Run run11_seed1227] checkpoint: part2_artifacts/checkpoints/a2c_run11_seed1227.pth [Run run11_seed1227] training plot (tail 500): part2_artifacts/train_curve_run11_seed1227.png [Run run11_seed1227] training plot (full): part2_artifacts/train_curve_full_run11_seed1227.png [Run run11_seed1227] per-worker plot: part2_artifacts/train_curve_workers_run11_seed1227.png [Run run11_seed1227] workers-average plot: part2_artifacts/train_curve_workers_avg_run11_seed1227.png
[Eval run11_seed1227] mean=24.68 std=81.33 min=-81.33 max=207.55 [Eval run11_seed1227] CSV: part2_artifacts/eval10_run11_seed1227.csv [Eval run11_seed1227] plot: part2_artifacts/eval10_run11_seed1227.png [Best] ep=3 return=207.55 seed=1230
/usr/local/lib/python3.12/dist-packages/gymnasium/wrappers/rendering.py:293: UserWarning: WARN: Overwriting existing videos at /content/part2_artifacts/videos/run11_seed1227 folder (try specifying a different `video_folder` for the `RecordVideo` wrapper if this is not desired)
logger.warn(
[Video run11_seed1227] episode return=207.55 [Video run11_seed1227] saved under: part2_artifacts/videos run11_seed1227 | mean=24.7±81.3 | best_ep=3, best_ret=207.5
Run#12
run_id = f"run12_seed{SEED}"
model, logs, paths = train_once(
run_id=run_id,
n_workers=3,
total_env_steps=800_000,
T=10,
gamma=0.99,
entropy_coef=0.02,
value_coef=0.55,
max_grad_norm=0.5,
lr=2.5e-4,
log_every=50_000,
)
metrics, _ = evaluate_10(run_id, paths.ckpt_path)
video_dir, best_idx, best_ret = record_best_from_eval(run_id, paths.ckpt_path)
print(f"{run_id} | mean={metrics['mean']:.1f}±{metrics['std']:.1f} | best_ep={best_idx}, best_ret={best_ret:.1f}")
[Run run12_seed1227] starting training… [A2C][sync] start: workers=3, T=10, target_steps=800000, mp=fork [A2C][sync] it= 1 steps= 30 (+ 30) avg10= nan loss=4.043 pg=-0.000 vf=7.402 H=1.386 gn=6.447 [worker 0] episodes_seen=10 last_return=-119.4 (+1 eps) [worker 1] episodes_seen=10 last_return=-138.5 (+1 eps) [worker 2] episodes_seen=10 last_return=-107.9 (+1 eps) [worker 0] episodes_seen=20 last_return=-189.8 (+1 eps) [worker 2] episodes_seen=20 last_return=-188.1 (+1 eps) [worker 1] episodes_seen=20 last_return=-171.4 (+1 eps) [worker 0] episodes_seen=30 last_return=-124.4 (+1 eps) [worker 2] episodes_seen=30 last_return=-151.7 (+1 eps) [worker 1] episodes_seen=30 last_return=-132.5 (+1 eps) [worker 0] episodes_seen=40 last_return=-191.8 (+1 eps) [worker 2] episodes_seen=40 last_return=-136.0 (+1 eps) [worker 1] episodes_seen=40 last_return=-118.3 (+1 eps) [worker 0] episodes_seen=50 last_return=-166.6 (+1 eps) [worker 2] episodes_seen=50 last_return=-198.9 (+1 eps) [worker 1] episodes_seen=50 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=60 last_return=-119.6 (+1 eps) [worker 2] episodes_seen=60 last_return=-132.0 (+1 eps) [worker 1] episodes_seen=60 last_return=-170.6 (+1 eps) [worker 0] episodes_seen=70 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=70 last_return=-114.7 (+1 eps) [worker 2] episodes_seen=70 last_return=-181.4 (+1 eps) [worker 0] episodes_seen=80 last_return=-136.4 (+1 eps) [worker 1] episodes_seen=80 last_return=-156.1 (+1 eps) [worker 0] episodes_seen=90 last_return=-111.5 (+1 eps) [worker 2] episodes_seen=80 last_return=-126.0 (+1 eps) [worker 1] episodes_seen=90 last_return=-118.0 (+1 eps) [worker 2] episodes_seen=90 last_return=-108.9 (+1 eps) [worker 0] episodes_seen=100 last_return=-140.3 (+1 eps) [worker 2] episodes_seen=100 last_return=-175.9 (+1 eps) [worker 1] episodes_seen=100 last_return=-25.0 (+1 eps) [worker 0] episodes_seen=110 last_return=-195.7 (+1 eps) [worker 1] episodes_seen=110 last_return=-133.1 (+1 eps) [worker 2] episodes_seen=110 last_return=-236.2 (+1 eps) [worker 0] episodes_seen=120 last_return=-173.1 (+1 eps) [worker 1] episodes_seen=120 last_return=-125.5 (+1 eps) [worker 0] episodes_seen=130 last_return=-182.7 (+1 eps) [worker 2] episodes_seen=120 last_return=-54.5 (+1 eps) [worker 2] episodes_seen=130 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=130 last_return=-192.3 (+1 eps) [worker 0] episodes_seen=140 last_return=-112.4 (+1 eps) [worker 2] episodes_seen=140 last_return=-141.5 (+1 eps) [worker 1] episodes_seen=140 last_return=-156.9 (+1 eps) [worker 0] episodes_seen=150 last_return=-146.0 (+1 eps) [worker 2] episodes_seen=150 last_return=-161.4 (+1 eps) [worker 1] episodes_seen=150 last_return=-176.8 (+1 eps) [worker 0] episodes_seen=160 last_return=-77.9 (+1 eps) [worker 2] episodes_seen=160 last_return=-140.1 (+1 eps) [A2C][sync] it= 1668 steps= 50040 (+ 30) avg10=-170.56 loss=249.751 pg=-0.000 vf=454.093 H=0.002 gn=1539.577 [worker 1] episodes_seen=160 last_return=-102.9 (+1 eps) [worker 0] episodes_seen=170 last_return=-221.0 (+1 eps) [worker 2] episodes_seen=170 last_return=-198.2 (+1 eps) [worker 1] episodes_seen=170 last_return=-154.9 (+1 eps) [worker 0] episodes_seen=180 last_return=4.4 (+1 eps) [worker 2] episodes_seen=180 last_return=-137.4 (+1 eps) [worker 1] episodes_seen=180 last_return=-112.2 (+1 eps) [worker 0] episodes_seen=190 last_return=-156.4 (+1 eps) [worker 2] episodes_seen=190 last_return=-150.5 (+1 eps) [worker 1] episodes_seen=190 last_return=-145.8 (+1 eps) [worker 0] episodes_seen=200 last_return=-190.9 (+1 eps) [worker 2] episodes_seen=200 last_return=-106.1 (+1 eps) [worker 1] episodes_seen=200 last_return=-165.1 (+1 eps) [worker 0] episodes_seen=210 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=210 last_return=-109.6 (+1 eps) [worker 1] episodes_seen=210 last_return=-125.4 (+1 eps) [worker 0] episodes_seen=220 last_return=-181.5 (+1 eps) [worker 2] episodes_seen=220 last_return=-117.2 (+1 eps) [worker 1] episodes_seen=220 last_return=-135.5 (+1 eps) [worker 0] episodes_seen=230 last_return=-11.4 (+1 eps) [worker 2] episodes_seen=230 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=230 last_return=-162.7 (+1 eps) [worker 0] episodes_seen=240 last_return=-136.8 (+1 eps) [worker 2] episodes_seen=240 last_return=-196.1 (+1 eps) [worker 1] episodes_seen=240 last_return=-206.1 (+1 eps) [worker 0] episodes_seen=250 last_return=-158.6 (+1 eps) [worker 2] episodes_seen=250 last_return=-214.6 (+1 eps) [worker 1] episodes_seen=250 last_return=-184.5 (+1 eps) [worker 0] episodes_seen=260 last_return=-106.4 (+1 eps) [worker 2] episodes_seen=260 last_return=-195.3 (+1 eps) [worker 1] episodes_seen=260 last_return=-214.6 (+1 eps) [worker 0] episodes_seen=270 last_return=-118.9 (+1 eps) [worker 2] episodes_seen=270 last_return=-192.5 (+1 eps) [worker 1] episodes_seen=270 last_return=-201.1 (+1 eps) [worker 0] episodes_seen=280 last_return=-127.2 (+1 eps) [worker 2] episodes_seen=280 last_return=-215.1 (+1 eps) [worker 1] episodes_seen=280 last_return=-174.2 (+1 eps) [worker 0] episodes_seen=290 last_return=-204.3 (+1 eps) [worker 2] episodes_seen=290 last_return=-42.9 (+1 eps) [worker 1] episodes_seen=290 last_return=-183.5 (+1 eps) [worker 0] episodes_seen=300 last_return=-152.8 (+1 eps) [worker 2] episodes_seen=300 last_return=-173.5 (+1 eps) [worker 1] episodes_seen=300 last_return=-177.7 (+1 eps) [worker 0] episodes_seen=310 last_return=-225.5 (+1 eps) [worker 2] episodes_seen=310 last_return=-186.6 (+1 eps) [worker 1] episodes_seen=310 last_return=-166.7 (+1 eps) [worker 0] episodes_seen=320 last_return=-189.3 (+1 eps) [worker 2] episodes_seen=320 last_return=-151.5 (+1 eps) [worker 1] episodes_seen=320 last_return=-113.3 (+1 eps) [worker 0] episodes_seen=330 last_return=-159.2 (+1 eps) [worker 2] episodes_seen=330 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=330 last_return=-272.1 (+1 eps) [worker 0] episodes_seen=340 last_return=31.3 (+1 eps) [worker 2] episodes_seen=340 last_return=-179.8 (+1 eps) [worker 1] episodes_seen=340 last_return=-116.3 (+1 eps) [worker 0] episodes_seen=350 last_return=-148.7 (+1 eps) [worker 2] episodes_seen=350 last_return=-117.4 (+1 eps) [worker 1] episodes_seen=350 last_return=-13.4 (+1 eps) [worker 0] episodes_seen=360 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=360 last_return=-177.4 (+1 eps) [worker 1] episodes_seen=360 last_return=-147.4 (+1 eps) [worker 0] episodes_seen=370 last_return=-115.0 (+1 eps) [worker 2] episodes_seen=370 last_return=-195.5 (+1 eps) [worker 1] episodes_seen=370 last_return=-66.3 (+1 eps) [worker 2] episodes_seen=380 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=380 last_return=-129.4 (+1 eps) [worker 1] episodes_seen=380 last_return=-152.5 (+1 eps) [worker 0] episodes_seen=390 last_return=-158.0 (+1 eps) [worker 2] episodes_seen=390 last_return=-192.8 (+1 eps) [worker 1] episodes_seen=390 last_return=-217.5 (+1 eps) [worker 0] episodes_seen=400 last_return=-204.6 (+1 eps) [worker 2] episodes_seen=400 last_return=-133.2 (+1 eps) [worker 1] episodes_seen=400 last_return=-112.4 (+1 eps) [worker 0] episodes_seen=410 last_return=-93.7 (+1 eps) [A2C][sync] it= 3335 steps= 100050 (+ 30) avg10=-144.08 loss=11507.660 pg=0.000 vf=20923.018 H=0.000 gn=32469.746 [worker 2] episodes_seen=410 last_return=-115.2 (+1 eps) [worker 1] episodes_seen=410 last_return=-144.9 (+1 eps) [worker 0] episodes_seen=420 last_return=-167.8 (+1 eps) [worker 2] episodes_seen=420 last_return=-199.4 (+1 eps) [worker 1] episodes_seen=420 last_return=-123.8 (+1 eps) [worker 0] episodes_seen=430 last_return=-189.1 (+1 eps) [worker 2] episodes_seen=430 last_return=-123.4 (+1 eps) [worker 1] episodes_seen=430 last_return=-210.3 (+1 eps) [worker 0] episodes_seen=440 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=440 last_return=-214.7 (+1 eps) [worker 1] episodes_seen=440 last_return=-169.9 (+1 eps) [worker 0] episodes_seen=450 last_return=-144.3 (+1 eps) [worker 2] episodes_seen=450 last_return=-61.5 (+1 eps) [worker 1] episodes_seen=450 last_return=-171.8 (+1 eps) [worker 2] episodes_seen=460 last_return=-114.6 (+1 eps) [worker 0] episodes_seen=460 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=460 last_return=-150.7 (+1 eps) [worker 2] episodes_seen=470 last_return=-154.6 (+1 eps) [worker 0] episodes_seen=470 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=470 last_return=-87.0 (+1 eps) [worker 2] episodes_seen=480 last_return=-202.7 (+1 eps) [worker 0] episodes_seen=480 last_return=-156.5 (+1 eps) [worker 1] episodes_seen=480 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=490 last_return=-150.3 (+1 eps) [worker 0] episodes_seen=490 last_return=-116.3 (+1 eps) [worker 1] episodes_seen=490 last_return=-116.8 (+1 eps) [worker 1] episodes_seen=500 last_return=-145.3 (+1 eps) [worker 2] episodes_seen=500 last_return=-172.4 (+1 eps) [worker 0] episodes_seen=500 last_return=-179.7 (+1 eps) [worker 1] episodes_seen=510 last_return=-122.8 (+1 eps) [worker 0] episodes_seen=510 last_return=-113.0 (+1 eps) [worker 2] episodes_seen=510 last_return=-171.0 (+1 eps) [worker 1] episodes_seen=520 last_return=-132.0 (+1 eps) [worker 2] episodes_seen=520 last_return=-146.3 (+1 eps) [worker 0] episodes_seen=520 last_return=-140.3 (+1 eps) [worker 2] episodes_seen=530 last_return=-195.7 (+1 eps) [worker 1] episodes_seen=530 last_return=-83.0 (+1 eps) [worker 0] episodes_seen=530 last_return=-47.9 (+1 eps) [worker 2] episodes_seen=540 last_return=-42.5 (+1 eps) [worker 1] episodes_seen=540 last_return=-166.7 (+1 eps) [worker 0] episodes_seen=540 last_return=-230.9 (+1 eps) [worker 2] episodes_seen=550 last_return=-166.5 (+1 eps) [worker 1] episodes_seen=550 last_return=-194.7 (+1 eps) [worker 0] episodes_seen=550 last_return=-113.3 (+1 eps) [worker 2] episodes_seen=560 last_return=-180.9 (+1 eps) [worker 1] episodes_seen=560 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=560 last_return=-102.0 (+1 eps) [worker 2] episodes_seen=570 last_return=-167.4 (+1 eps) [worker 1] episodes_seen=570 last_return=-83.6 (+1 eps) [worker 0] episodes_seen=570 last_return=-114.7 (+1 eps) [worker 2] episodes_seen=580 last_return=-89.5 (+1 eps) [worker 1] episodes_seen=580 last_return=-92.2 (+1 eps) [worker 0] episodes_seen=580 last_return=-42.7 (+1 eps) [worker 2] episodes_seen=590 last_return=-81.3 (+1 eps) [worker 1] episodes_seen=590 last_return=-157.9 (+1 eps) [worker 0] episodes_seen=590 last_return=-36.9 (+1 eps) [worker 2] episodes_seen=600 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=600 last_return=-88.9 (+1 eps) [worker 0] episodes_seen=600 last_return=-90.7 (+1 eps) [worker 2] episodes_seen=610 last_return=-93.4 (+1 eps) [worker 1] episodes_seen=610 last_return=-82.2 (+1 eps) [worker 0] episodes_seen=610 last_return=-144.2 (+1 eps) [worker 2] episodes_seen=620 last_return=-87.9 (+1 eps) [worker 1] episodes_seen=620 last_return=-123.2 (+1 eps) [worker 0] episodes_seen=620 last_return=-149.5 (+1 eps) [worker 2] episodes_seen=630 last_return=-89.0 (+1 eps) [worker 1] episodes_seen=630 last_return=-102.8 (+1 eps) [worker 0] episodes_seen=630 last_return=-138.2 (+1 eps) [A2C][sync] it= 5002 steps= 150060 (+ 30) avg10= -88.82 loss=42.563 pg=-0.034 vf=77.495 H=1.270 gn=249.419 [worker 2] episodes_seen=640 last_return=-95.5 (+1 eps) [worker 1] episodes_seen=640 last_return=-77.4 (+1 eps) [worker 0] episodes_seen=640 last_return=-93.6 (+1 eps) [worker 2] episodes_seen=650 last_return=-128.7 (+1 eps) [worker 1] episodes_seen=650 last_return=-99.1 (+1 eps) [worker 0] episodes_seen=650 last_return=-128.0 (+1 eps) [worker 2] episodes_seen=660 last_return=-70.8 (+1 eps) [worker 1] episodes_seen=660 last_return=-171.3 (+1 eps) [worker 0] episodes_seen=660 last_return=-79.4 (+1 eps) [worker 2] episodes_seen=670 last_return=-77.2 (+1 eps) [worker 1] episodes_seen=670 last_return=-76.7 (+1 eps) [worker 0] episodes_seen=670 last_return=-70.4 (+1 eps) [worker 2] episodes_seen=680 last_return=-153.7 (+1 eps) [worker 1] episodes_seen=680 last_return=-81.1 (+1 eps) [worker 0] episodes_seen=680 last_return=-99.8 (+1 eps) [worker 2] episodes_seen=690 last_return=-56.4 (+1 eps) [worker 1] episodes_seen=690 last_return=-154.4 (+1 eps) [worker 0] episodes_seen=690 last_return=-127.1 (+1 eps) [worker 2] episodes_seen=700 last_return=-111.8 (+1 eps) [worker 1] episodes_seen=700 last_return=-114.0 (+1 eps) [worker 0] episodes_seen=700 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=710 last_return=-83.0 (+1 eps) [worker 1] episodes_seen=710 last_return=-89.3 (+1 eps) [worker 0] episodes_seen=710 last_return=-112.6 (+1 eps) [worker 2] episodes_seen=720 last_return=-97.2 (+1 eps) [worker 1] episodes_seen=720 last_return=-97.8 (+1 eps) [worker 0] episodes_seen=720 last_return=-96.1 (+1 eps) [worker 2] episodes_seen=730 last_return=-105.8 (+1 eps) [worker 1] episodes_seen=730 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=730 last_return=-56.8 (+1 eps) [worker 2] episodes_seen=740 last_return=-69.0 (+1 eps) [worker 1] episodes_seen=740 last_return=-127.3 (+1 eps) [worker 0] episodes_seen=740 last_return=-103.8 (+1 eps) [worker 2] episodes_seen=750 last_return=-74.6 (+1 eps) [worker 1] episodes_seen=750 last_return=-129.2 (+1 eps) [worker 0] episodes_seen=750 last_return=-171.5 (+1 eps) [worker 2] episodes_seen=760 last_return=-96.9 (+1 eps) [worker 1] episodes_seen=760 last_return=-115.5 (+1 eps) [worker 0] episodes_seen=760 last_return=-105.5 (+1 eps) [worker 1] episodes_seen=770 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=770 last_return=-109.0 (+1 eps) [worker 0] episodes_seen=770 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=780 last_return=-114.5 (+1 eps) [worker 2] episodes_seen=780 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=780 last_return=-111.2 (+1 eps) [worker 1] episodes_seen=790 last_return=-78.9 (+1 eps) [worker 2] episodes_seen=790 last_return=-69.5 (+1 eps) [A2C][sync] it= 6669 steps= 200070 (+ 30) avg10= -93.35 loss=137.321 pg=-0.102 vf=249.898 H=1.092 gn=535.437 [worker 0] episodes_seen=790 last_return=-105.0 (+1 eps) [worker 1] episodes_seen=800 last_return=-107.9 (+1 eps) [worker 2] episodes_seen=800 last_return=-98.5 (+1 eps) [worker 0] episodes_seen=800 last_return=-105.9 (+1 eps) [worker 1] episodes_seen=810 last_return=-278.6 (+1 eps) [worker 2] episodes_seen=810 last_return=-202.9 (+1 eps) [worker 0] episodes_seen=810 last_return=-113.4 (+1 eps) [worker 1] episodes_seen=820 last_return=-135.6 (+1 eps) [worker 2] episodes_seen=820 last_return=-150.4 (+1 eps) [worker 0] episodes_seen=820 last_return=-206.2 (+1 eps) [worker 1] episodes_seen=830 last_return=-151.3 (+1 eps) [worker 2] episodes_seen=830 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=840 last_return=-88.5 (+1 eps) [worker 0] episodes_seen=830 last_return=-89.6 (+1 eps) [worker 2] episodes_seen=840 last_return=-90.5 (+1 eps) [worker 0] episodes_seen=840 last_return=-66.3 (+1 eps) [worker 2] episodes_seen=850 last_return=-94.2 (+1 eps) [worker 1] episodes_seen=850 last_return=-99.7 (+1 eps) [worker 0] episodes_seen=850 last_return=-96.0 (+1 eps) [worker 2] episodes_seen=860 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=860 last_return=-64.5 (+1 eps) [worker 0] episodes_seen=860 last_return=-103.0 (+1 eps) [worker 2] episodes_seen=870 last_return=-113.5 (+1 eps) [worker 0] episodes_seen=870 last_return=-111.3 (+1 eps) [worker 1] episodes_seen=870 last_return=-36.7 (+1 eps) [worker 2] episodes_seen=880 last_return=-50.4 (+1 eps) [worker 0] episodes_seen=880 last_return=-32.1 (+1 eps) [worker 1] episodes_seen=880 last_return=-92.5 (+1 eps) [A2C][sync] it= 8336 steps= 250080 (+ 30) avg10= -95.60 loss=26.863 pg=-0.148 vf=49.147 H=0.983 gn=407.319 [worker 0] episodes_seen=890 last_return=-90.9 (+1 eps) [worker 2] episodes_seen=890 last_return=-102.4 (+1 eps) [worker 1] episodes_seen=890 last_return=-95.6 (+1 eps) [worker 2] episodes_seen=900 last_return=-48.0 (+1 eps) [worker 1] episodes_seen=900 last_return=-105.9 (+1 eps) [worker 0] episodes_seen=900 last_return=0.8 (+1 eps) [worker 2] episodes_seen=910 last_return=-79.5 (+1 eps) [worker 1] episodes_seen=910 last_return=-61.8 (+1 eps) [worker 0] episodes_seen=910 last_return=-73.6 (+1 eps) [worker 2] episodes_seen=920 last_return=1.3 (+1 eps) [worker 0] episodes_seen=920 last_return=-76.4 (+1 eps) [worker 1] episodes_seen=920 last_return=-22.7 (+1 eps) [worker 2] episodes_seen=930 last_return=-90.3 (+1 eps) [worker 0] episodes_seen=930 last_return=-84.8 (+1 eps) [worker 1] episodes_seen=930 last_return=-50.0 (+1 eps) [worker 0] episodes_seen=940 last_return=-60.2 (+1 eps) [worker 2] episodes_seen=940 last_return=-70.6 (+1 eps) [A2C][sync] it=10003 steps= 300090 (+ 30) avg10= -63.42 loss=51.533 pg=0.269 vf=93.238 H=0.855 gn=706.943 [worker 0] episodes_seen=950 last_return=-109.8 (+1 eps) [worker 1] episodes_seen=940 last_return=-74.5 (+1 eps) [worker 2] episodes_seen=950 last_return=-66.6 (+1 eps) [worker 1] episodes_seen=950 last_return=-78.6 (+1 eps) [worker 0] episodes_seen=960 last_return=-100.6 (+1 eps) [worker 2] episodes_seen=960 last_return=-145.8 (+1 eps) [worker 1] episodes_seen=960 last_return=-68.1 (+1 eps) [worker 2] episodes_seen=970 last_return=-97.6 (+1 eps) [worker 0] episodes_seen=970 last_return=-81.6 (+1 eps) [worker 1] episodes_seen=970 last_return=-103.6 (+1 eps) [A2C][sync] it=11670 steps= 350100 (+ 30) avg10= -74.66 loss=5.350 pg=-0.130 vf=9.999 H=0.999 gn=135.827 [worker 2] episodes_seen=980 last_return=-99.3 (+1 eps) [worker 0] episodes_seen=980 last_return=-95.6 (+1 eps) [worker 1] episodes_seen=980 last_return=-79.7 (+1 eps) [worker 0] episodes_seen=990 last_return=-9.0 (+1 eps) [worker 2] episodes_seen=990 last_return=0.8 (+1 eps) [worker 1] episodes_seen=990 last_return=-75.5 (+1 eps) [worker 1] episodes_seen=1000 last_return=-3.0 (+1 eps) [worker 0] episodes_seen=1000 last_return=-15.4 (+1 eps) [A2C][sync] it=13337 steps= 400110 (+ 30) avg10= -16.88 loss=3.580 pg=-0.150 vf=6.819 H=0.994 gn=202.939 [worker 2] episodes_seen=1000 last_return=0.4 (+1 eps) [worker 1] episodes_seen=1010 last_return=-11.0 (+1 eps) [worker 2] episodes_seen=1010 last_return=-105.3 (+1 eps) [worker 0] episodes_seen=1010 last_return=-4.3 (+1 eps) [worker 2] episodes_seen=1020 last_return=-72.3 (+1 eps) [worker 1] episodes_seen=1020 last_return=-95.5 (+1 eps) [worker 0] episodes_seen=1020 last_return=-89.7 (+1 eps) [A2C][sync] it=15004 steps= 450120 (+ 30) avg10= -61.88 loss=26.730 pg=-0.130 vf=48.874 H=1.068 gn=275.154 [worker 2] episodes_seen=1030 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1030 last_return=2.7 (+1 eps) [worker 1] episodes_seen=1030 last_return=-0.3 (+1 eps) [worker 2] episodes_seen=1040 last_return=-108.5 (+1 eps) [worker 0] episodes_seen=1040 last_return=-111.8 (+1 eps) [worker 1] episodes_seen=1040 last_return=-95.9 (+1 eps) [worker 2] episodes_seen=1050 last_return=-88.3 (+1 eps) [A2C][sync] it=16671 steps= 500130 (+ 30) avg10= -53.88 loss=0.943 pg=-0.073 vf=1.894 H=1.244 gn=73.966 [worker 0] episodes_seen=1050 last_return=-109.3 (+1 eps) [worker 1] episodes_seen=1050 last_return=-79.3 (+1 eps) [worker 2] episodes_seen=1060 last_return=-22.6 (+1 eps) [worker 0] episodes_seen=1060 last_return=-90.0 (+1 eps) [worker 2] episodes_seen=1070 last_return=-0.1 (+1 eps) [worker 1] episodes_seen=1060 last_return=-0.6 (+1 eps) [worker 0] episodes_seen=1070 last_return=-103.3 (+1 eps) [worker 1] episodes_seen=1070 last_return=100.0 (+1 eps) [worker 2] episodes_seen=1080 last_return=-87.0 (+1 eps) [worker 0] episodes_seen=1080 last_return=-72.4 (+1 eps) [A2C][sync] it=18338 steps= 550140 (+ 30) avg10= 13.14 loss=2.457 pg=0.301 vf=3.949 H=0.795 gn=87.206 [worker 1] episodes_seen=1080 last_return=100.0 (+1 eps) [worker 2] episodes_seen=1090 last_return=100.0 (+1 eps) [worker 0] episodes_seen=1090 last_return=-91.5 (+1 eps) [worker 2] episodes_seen=1100 last_return=-99.1 (+1 eps) [worker 1] episodes_seen=1090 last_return=100.0 (+1 eps) [worker 0] episodes_seen=1100 last_return=100.0 (+1 eps) [worker 2] episodes_seen=1110 last_return=100.0 (+1 eps) [worker 0] episodes_seen=1110 last_return=-102.0 (+1 eps) [worker 1] episodes_seen=1100 last_return=100.0 (+1 eps) [A2C][sync] it=20005 steps= 600150 (+ 30) avg10= 35.83 loss=22.847 pg=0.196 vf=41.209 H=0.687 gn=418.665 [worker 0] episodes_seen=1120 last_return=100.0 (+1 eps) [worker 1] episodes_seen=1110 last_return=100.0 (+1 eps) [worker 2] episodes_seen=1120 last_return=100.0 (+1 eps) [worker 2] episodes_seen=1130 last_return=100.0 (+1 eps) [worker 1] episodes_seen=1120 last_return=-103.9 (+1 eps) [worker 0] episodes_seen=1130 last_return=100.0 (+1 eps) [worker 2] episodes_seen=1140 last_return=-118.6 (+1 eps) [worker 1] episodes_seen=1130 last_return=100.0 (+1 eps) [worker 0] episodes_seen=1140 last_return=100.0 (+1 eps) [worker 0] episodes_seen=1150 last_return=100.0 (+1 eps) [worker 1] episodes_seen=1140 last_return=100.0 (+1 eps) [worker 2] episodes_seen=1150 last_return=100.0 (+1 eps) [A2C][sync] it=21672 steps= 650160 (+ 30) avg10= 79.86 loss=1.703 pg=-0.458 vf=3.952 H=0.611 gn=142.365 [worker 0] episodes_seen=1160 last_return=100.0 (+1 eps) [worker 1] episodes_seen=1150 last_return=100.0 (+1 eps) [worker 2] episodes_seen=1160 last_return=100.0 (+1 eps) [worker 0] episodes_seen=1170 last_return=-124.8 (+1 eps) [worker 1] episodes_seen=1160 last_return=100.0 (+1 eps) [worker 2] episodes_seen=1170 last_return=-118.8 (+1 eps) [worker 1] episodes_seen=1170 last_return=-89.5 (+1 eps) [worker 0] episodes_seen=1180 last_return=100.0 (+1 eps) [worker 2] episodes_seen=1180 last_return=100.0 (+1 eps) [worker 1] episodes_seen=1180 last_return=100.0 (+1 eps) [worker 0] episodes_seen=1190 last_return=100.0 (+1 eps) [worker 2] episodes_seen=1190 last_return=100.0 (+1 eps) [worker 0] episodes_seen=1200 last_return=-105.7 (+1 eps) [worker 1] episodes_seen=1190 last_return=100.0 (+1 eps) [worker 2] episodes_seen=1200 last_return=100.0 (+1 eps) [A2C][sync] it=23339 steps= 700170 (+ 30) avg10= 56.94 loss=2.736 pg=-0.234 vf=5.419 H=0.525 gn=164.571 [worker 0] episodes_seen=1210 last_return=100.0 (+1 eps) [worker 2] episodes_seen=1210 last_return=100.0 (+1 eps) [worker 1] episodes_seen=1200 last_return=100.0 (+1 eps) [worker 0] episodes_seen=1220 last_return=100.0 (+1 eps) [worker 2] episodes_seen=1220 last_return=-95.0 (+1 eps) [worker 1] episodes_seen=1210 last_return=-119.8 (+1 eps) [worker 0] episodes_seen=1230 last_return=100.0 (+1 eps) [worker 2] episodes_seen=1230 last_return=-101.6 (+1 eps) [worker 1] episodes_seen=1220 last_return=100.0 (+1 eps) [worker 2] episodes_seen=1240 last_return=100.0 (+1 eps) [worker 1] episodes_seen=1230 last_return=-78.2 (+1 eps) [worker 0] episodes_seen=1240 last_return=100.0 (+1 eps) [worker 2] episodes_seen=1250 last_return=-116.2 (+1 eps) [worker 1] episodes_seen=1240 last_return=100.0 (+1 eps) [worker 0] episodes_seen=1250 last_return=100.0 (+1 eps) [worker 2] episodes_seen=1260 last_return=-101.4 (+1 eps) [worker 1] episodes_seen=1250 last_return=-103.1 (+1 eps) [worker 0] episodes_seen=1260 last_return=100.0 (+1 eps) [A2C][sync] it=25006 steps= 750180 (+ 30) avg10= 40.63 loss=249.321 pg=-0.110 vf=453.527 H=0.429 gn=1275.424 [worker 2] episodes_seen=1270 last_return=100.0 (+1 eps) [worker 1] episodes_seen=1260 last_return=100.0 (+1 eps) [worker 0] episodes_seen=1270 last_return=-128.3 (+1 eps) [worker 2] episodes_seen=1280 last_return=100.0 (+1 eps) [worker 1] episodes_seen=1270 last_return=100.0 (+1 eps) [worker 0] episodes_seen=1280 last_return=100.0 (+1 eps) [worker 2] episodes_seen=1290 last_return=100.0 (+1 eps) [worker 1] episodes_seen=1280 last_return=100.0 (+1 eps) [worker 0] episodes_seen=1290 last_return=-82.7 (+1 eps) [worker 2] episodes_seen=1300 last_return=100.0 (+1 eps) [worker 1] episodes_seen=1290 last_return=-95.6 (+1 eps) [worker 0] episodes_seen=1300 last_return=-121.6 (+1 eps) [worker 0] episodes_seen=1310 last_return=-99.1 (+1 eps) [worker 2] episodes_seen=1310 last_return=100.0 (+1 eps) [worker 1] episodes_seen=1300 last_return=100.0 (+1 eps) [worker 0] episodes_seen=1320 last_return=100.0 (+1 eps) [worker 1] episodes_seen=1310 last_return=-92.1 (+1 eps) [worker 2] episodes_seen=1320 last_return=100.0 (+1 eps) [worker 0] episodes_seen=1330 last_return=100.0 (+1 eps) [worker 1] episodes_seen=1320 last_return=100.0 (+1 eps) [worker 2] episodes_seen=1330 last_return=100.0 (+1 eps) [A2C][sync] it=26667 steps= 800010 (+ 30) avg10= -0.96 loss=3.431 pg=0.310 vf=5.699 H=0.653 gn=138.510 [Checkpoint] Saved self-describing checkpoint → part2_artifacts/checkpoints/a2c_run12_seed1227.pth [A2C][sync] done: steps=800010 time=1061.1s avg10=-0.96
[Run run12_seed1227] checkpoint: part2_artifacts/checkpoints/a2c_run12_seed1227.pth [Run run12_seed1227] training plot (tail 500): part2_artifacts/train_curve_run12_seed1227.png [Run run12_seed1227] training plot (full): part2_artifacts/train_curve_full_run12_seed1227.png [Run run12_seed1227] per-worker plot: part2_artifacts/train_curve_workers_run12_seed1227.png [Run run12_seed1227] workers-average plot: part2_artifacts/train_curve_workers_avg_run12_seed1227.png
[Eval run12_seed1227] mean=206.74 std=101.76 min=-1.67 max=291.73 [Eval run12_seed1227] CSV: part2_artifacts/eval10_run12_seed1227.csv [Eval run12_seed1227] plot: part2_artifacts/eval10_run12_seed1227.png [Best] ep=7 return=291.73 seed=1234
/usr/local/lib/python3.12/dist-packages/gymnasium/wrappers/rendering.py:293: UserWarning: WARN: Overwriting existing videos at /content/part2_artifacts/videos/run12_seed1227 folder (try specifying a different `video_folder` for the `RecordVideo` wrapper if this is not desired)
logger.warn(
[Video run12_seed1227] episode return=291.73 [Video run12_seed1227] saved under: part2_artifacts/videos run12_seed1227 | mean=206.7±101.8 | best_ep=7, best_ret=291.7
Run#13
run_id = f"run13_seed{SEED}"
model, logs, paths = train_once(
run_id=run_id,
n_workers=4,
total_env_steps=900_000,
T=10,
gamma=0.99,
entropy_coef=0.015,
value_coef=0.60,
max_grad_norm=0.5,
lr=2.5e-4,
log_every=60_000,
)
metrics, _ = evaluate_10(run_id, paths.ckpt_path)
video_dir, best_idx, best_ret = record_best_from_eval(run_id, paths.ckpt_path)
print(f"{run_id} | mean={metrics['mean']:.1f}±{metrics['std']:.1f} | best_ep={best_idx}, best_ret={best_ret:.1f}")
[Run run13_seed1227] starting training… [A2C][sync] start: workers=4, T=10, target_steps=900000, mp=fork [A2C][sync] it= 1 steps= 40 (+ 40) avg10= nan loss=19.774 pg=-0.000 vf=32.991 H=1.386 gn=4.932 [worker 0] episodes_seen=10 last_return=-172.7 (+1 eps) [worker 1] episodes_seen=10 last_return=-189.3 (+1 eps) [worker 3] episodes_seen=10 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=10 last_return=-156.1 (+1 eps) [worker 0] episodes_seen=20 last_return=-173.4 (+1 eps) [worker 1] episodes_seen=20 last_return=-115.1 (+1 eps) [worker 3] episodes_seen=20 last_return=-104.1 (+1 eps) [worker 2] episodes_seen=20 last_return=-132.5 (+1 eps) [worker 0] episodes_seen=30 last_return=-84.3 (+1 eps) [worker 1] episodes_seen=30 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=30 last_return=-179.1 (+1 eps) [worker 2] episodes_seen=30 last_return=-95.2 (+1 eps) [worker 1] episodes_seen=40 last_return=-163.6 (+1 eps) [worker 2] episodes_seen=40 last_return=-122.0 (+1 eps) [worker 0] episodes_seen=40 last_return=-118.9 (+1 eps) [worker 3] episodes_seen=40 last_return=-171.3 (+1 eps) [worker 2] episodes_seen=50 last_return=-140.8 (+1 eps) [worker 1] episodes_seen=50 last_return=-151.8 (+1 eps) [worker 0] episodes_seen=50 last_return=-136.9 (+1 eps) [worker 3] episodes_seen=50 last_return=-175.9 (+1 eps) [worker 2] episodes_seen=60 last_return=-211.0 (+1 eps) [worker 0] episodes_seen=60 last_return=9.2 (+1 eps) [worker 1] episodes_seen=60 last_return=-174.2 (+1 eps) [worker 3] episodes_seen=60 last_return=-142.6 (+1 eps) [worker 2] episodes_seen=70 last_return=-191.4 (+1 eps) [worker 0] episodes_seen=70 last_return=-188.6 (+1 eps) [worker 1] episodes_seen=70 last_return=-190.0 (+1 eps) [worker 3] episodes_seen=70 last_return=-152.4 (+1 eps) [worker 2] episodes_seen=80 last_return=-116.4 (+1 eps) [worker 0] episodes_seen=80 last_return=-170.0 (+1 eps) [worker 1] episodes_seen=80 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=80 last_return=-192.0 (+1 eps) [worker 0] episodes_seen=90 last_return=-161.3 (+1 eps) [worker 2] episodes_seen=90 last_return=-184.1 (+1 eps) [worker 1] episodes_seen=90 last_return=-125.0 (+1 eps) [worker 3] episodes_seen=90 last_return=-122.7 (+1 eps) [worker 0] episodes_seen=100 last_return=-174.5 (+1 eps) [worker 2] episodes_seen=100 last_return=-134.8 (+1 eps) [worker 1] episodes_seen=100 last_return=-113.0 (+1 eps) [worker 0] episodes_seen=110 last_return=-108.6 (+1 eps) [worker 3] episodes_seen=100 last_return=-167.6 (+1 eps) [worker 2] episodes_seen=110 last_return=-173.1 (+1 eps) [worker 1] episodes_seen=110 last_return=-111.6 (+1 eps) [worker 0] episodes_seen=120 last_return=-175.6 (+1 eps) [worker 3] episodes_seen=110 last_return=-142.9 (+1 eps) [worker 2] episodes_seen=120 last_return=-118.9 (+1 eps) [worker 1] episodes_seen=120 last_return=-162.6 (+1 eps) [worker 0] episodes_seen=130 last_return=-119.3 (+1 eps) [worker 2] episodes_seen=130 last_return=-113.0 (+1 eps) [worker 3] episodes_seen=120 last_return=-118.6 (+1 eps) [worker 0] episodes_seen=140 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=130 last_return=-141.6 (+1 eps) [worker 2] episodes_seen=140 last_return=-161.2 (+1 eps) [worker 3] episodes_seen=130 last_return=-120.0 (+1 eps) [worker 0] episodes_seen=150 last_return=-164.8 (+1 eps) [worker 2] episodes_seen=150 last_return=-149.9 (+1 eps) [A2C][sync] it= 1501 steps= 60040 (+ 40) avg10=-140.49 loss=640.406 pg=0.000 vf=1067.343 H=0.000 gn=26417.594 [worker 1] episodes_seen=140 last_return=-156.3 (+1 eps) [worker 3] episodes_seen=140 last_return=-119.7 (+1 eps) [worker 2] episodes_seen=160 last_return=-174.1 (+1 eps) [worker 0] episodes_seen=160 last_return=-163.2 (+1 eps) [worker 1] episodes_seen=150 last_return=-109.6 (+1 eps) [worker 3] episodes_seen=150 last_return=-162.6 (+1 eps) [worker 2] episodes_seen=170 last_return=-109.5 (+1 eps) [worker 0] episodes_seen=170 last_return=-165.0 (+1 eps) [worker 3] episodes_seen=160 last_return=-148.0 (+1 eps) [worker 1] episodes_seen=160 last_return=-127.8 (+1 eps) [worker 2] episodes_seen=180 last_return=-157.4 (+1 eps) [worker 3] episodes_seen=170 last_return=-148.2 (+1 eps) [worker 0] episodes_seen=180 last_return=-134.5 (+1 eps) [worker 1] episodes_seen=170 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=190 last_return=-185.5 (+1 eps) [worker 3] episodes_seen=180 last_return=-175.4 (+1 eps) [worker 0] episodes_seen=190 last_return=-178.9 (+1 eps) [worker 1] episodes_seen=180 last_return=-108.6 (+1 eps) [worker 2] episodes_seen=200 last_return=-119.1 (+1 eps) [worker 3] episodes_seen=190 last_return=-107.1 (+1 eps) [worker 1] episodes_seen=190 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=200 last_return=-147.1 (+1 eps) [worker 3] episodes_seen=200 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=210 last_return=-128.5 (+1 eps) [worker 0] episodes_seen=210 last_return=-172.9 (+1 eps) [worker 1] episodes_seen=200 last_return=-111.3 (+1 eps) [worker 3] episodes_seen=210 last_return=-158.1 (+1 eps) [worker 2] episodes_seen=220 last_return=-146.7 (+1 eps) [worker 0] episodes_seen=220 last_return=-155.9 (+1 eps) [worker 1] episodes_seen=210 last_return=-141.5 (+1 eps) [worker 2] episodes_seen=230 last_return=-147.4 (+1 eps) [worker 3] episodes_seen=220 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=220 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=230 last_return=-115.3 (+1 eps) [worker 2] episodes_seen=240 last_return=-141.5 (+1 eps) [worker 3] episodes_seen=230 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=230 last_return=-134.0 (+1 eps) [worker 0] episodes_seen=240 last_return=-113.5 (+1 eps) [worker 2] episodes_seen=250 last_return=-153.8 (+1 eps) [worker 3] episodes_seen=240 last_return=-140.1 (+1 eps) [worker 1] episodes_seen=240 last_return=-122.5 (+1 eps) [worker 0] episodes_seen=250 last_return=-178.2 (+1 eps) [worker 2] episodes_seen=260 last_return=-108.0 (+1 eps) [worker 3] episodes_seen=250 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=250 last_return=-147.8 (+1 eps) [worker 0] episodes_seen=260 last_return=-108.8 (+1 eps) [worker 3] episodes_seen=260 last_return=-138.4 (+1 eps) [worker 2] episodes_seen=270 last_return=-152.4 (+1 eps) [A2C][sync] it= 3001 steps= 120040 (+ 40) avg10=-141.99 loss=2846.680 pg=0.000 vf=4744.467 H=0.000 gn=44897.918 [worker 1] episodes_seen=260 last_return=-160.4 (+1 eps) [worker 0] episodes_seen=270 last_return=-170.9 (+1 eps) [worker 2] episodes_seen=280 last_return=-139.5 (+1 eps) [worker 3] episodes_seen=270 last_return=-240.3 (+1 eps) [worker 1] episodes_seen=270 last_return=-102.4 (+1 eps) [worker 0] episodes_seen=280 last_return=-130.6 (+1 eps) [worker 2] episodes_seen=290 last_return=-105.5 (+1 eps) [worker 1] episodes_seen=280 last_return=-192.1 (+1 eps) [worker 3] episodes_seen=280 last_return=-139.3 (+1 eps) [worker 0] episodes_seen=290 last_return=-147.5 (+1 eps) [worker 2] episodes_seen=300 last_return=-140.0 (+1 eps) [worker 1] episodes_seen=290 last_return=-145.6 (+1 eps) [worker 3] episodes_seen=290 last_return=-126.2 (+1 eps) [worker 0] episodes_seen=300 last_return=-160.5 (+1 eps) [worker 2] episodes_seen=310 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=300 last_return=-111.2 (+1 eps) [worker 3] episodes_seen=300 last_return=-147.2 (+1 eps) [worker 0] episodes_seen=310 last_return=-129.6 (+1 eps) [worker 2] episodes_seen=320 last_return=-192.3 (+1 eps) [worker 1] episodes_seen=310 last_return=-119.9 (+1 eps) [worker 3] episodes_seen=310 last_return=-149.7 (+1 eps) [worker 1] episodes_seen=320 last_return=-112.1 (+1 eps) [worker 0] episodes_seen=320 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=330 last_return=-122.1 (+1 eps) [worker 3] episodes_seen=320 last_return=-145.3 (+1 eps) [worker 1] episodes_seen=330 last_return=-175.6 (+1 eps) [worker 0] episodes_seen=330 last_return=-168.8 (+1 eps) [worker 2] episodes_seen=340 last_return=-119.5 (+1 eps) [worker 3] episodes_seen=330 last_return=-125.8 (+1 eps) [worker 1] episodes_seen=340 last_return=-108.9 (+1 eps) [worker 0] episodes_seen=340 last_return=-126.7 (+1 eps) [worker 2] episodes_seen=350 last_return=-149.2 (+1 eps) [worker 3] episodes_seen=340 last_return=-128.1 (+1 eps) [worker 0] episodes_seen=350 last_return=-168.5 (+1 eps) [worker 1] episodes_seen=350 last_return=-121.8 (+1 eps) [worker 2] episodes_seen=360 last_return=-121.0 (+1 eps) [worker 3] episodes_seen=350 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=360 last_return=-135.8 (+1 eps) [worker 1] episodes_seen=360 last_return=-142.6 (+1 eps) [worker 2] episodes_seen=370 last_return=-107.5 (+1 eps) [worker 0] episodes_seen=370 last_return=-137.4 (+1 eps) [worker 3] episodes_seen=360 last_return=-196.2 (+1 eps) [worker 1] episodes_seen=370 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=380 last_return=-195.8 (+1 eps) [worker 3] episodes_seen=370 last_return=-144.9 (+1 eps) [worker 1] episodes_seen=380 last_return=-179.2 (+1 eps) [worker 0] episodes_seen=380 last_return=-100.0 (+1 eps) [A2C][sync] it= 4501 steps= 180040 (+ 40) avg10=-121.72 loss=13810.735 pg=0.036 vf=23017.840 H=0.326 gn=130251.477 [worker 2] episodes_seen=390 last_return=-121.2 (+1 eps) [worker 1] episodes_seen=390 last_return=-142.0 (+1 eps) [worker 0] episodes_seen=390 last_return=-119.7 (+1 eps) [worker 3] episodes_seen=380 last_return=-140.7 (+1 eps) [worker 2] episodes_seen=400 last_return=-118.6 (+1 eps) [worker 1] episodes_seen=400 last_return=-140.1 (+1 eps) [worker 0] episodes_seen=400 last_return=-153.1 (+1 eps) [worker 3] episodes_seen=390 last_return=-167.9 (+1 eps) [worker 2] episodes_seen=410 last_return=-157.3 (+1 eps) [worker 1] episodes_seen=410 last_return=-170.3 (+1 eps) [worker 3] episodes_seen=400 last_return=-162.2 (+1 eps) [worker 0] episodes_seen=410 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=420 last_return=-151.1 (+1 eps) [worker 3] episodes_seen=410 last_return=-138.8 (+1 eps) [worker 0] episodes_seen=420 last_return=-104.9 (+1 eps) [worker 1] episodes_seen=420 last_return=-110.5 (+1 eps) [worker 2] episodes_seen=430 last_return=-123.2 (+1 eps) [worker 3] episodes_seen=420 last_return=-149.5 (+1 eps) [worker 1] episodes_seen=430 last_return=-108.3 (+1 eps) [worker 0] episodes_seen=430 last_return=-158.1 (+1 eps) [worker 2] episodes_seen=440 last_return=-136.4 (+1 eps) [worker 3] episodes_seen=430 last_return=-132.0 (+1 eps) [worker 1] episodes_seen=440 last_return=-148.7 (+1 eps) [worker 0] episodes_seen=440 last_return=-179.1 (+1 eps) [worker 3] episodes_seen=440 last_return=-103.1 (+1 eps) [worker 2] episodes_seen=450 last_return=-110.9 (+1 eps) [worker 1] episodes_seen=450 last_return=-174.8 (+1 eps) [worker 0] episodes_seen=450 last_return=-131.5 (+1 eps) [worker 2] episodes_seen=460 last_return=-111.5 (+1 eps) [worker 3] episodes_seen=450 last_return=-124.4 (+1 eps) [worker 0] episodes_seen=460 last_return=-131.8 (+1 eps) [worker 1] episodes_seen=460 last_return=-156.0 (+1 eps) [worker 2] episodes_seen=470 last_return=-198.6 (+1 eps) [worker 0] episodes_seen=470 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=460 last_return=-139.2 (+1 eps) [worker 1] episodes_seen=470 last_return=-203.3 (+1 eps) [worker 2] episodes_seen=480 last_return=-110.2 (+1 eps) [worker 0] episodes_seen=480 last_return=-95.5 (+1 eps) [worker 1] episodes_seen=480 last_return=-134.8 (+1 eps) [worker 3] episodes_seen=470 last_return=-137.8 (+1 eps) [worker 2] episodes_seen=490 last_return=-137.3 (+1 eps) [worker 3] episodes_seen=480 last_return=-157.9 (+1 eps) [worker 1] episodes_seen=490 last_return=-125.1 (+1 eps) [worker 0] episodes_seen=490 last_return=-134.6 (+1 eps) [A2C][sync] it= 6001 steps= 240040 (+ 40) avg10=-137.35 loss=88538.055 pg=0.018 vf=147563.406 H=0.374 gn=757033.062 [worker 2] episodes_seen=500 last_return=-123.2 (+1 eps) [worker 1] episodes_seen=500 last_return=-173.3 (+1 eps) [worker 3] episodes_seen=490 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=500 last_return=-115.0 (+1 eps) [worker 2] episodes_seen=510 last_return=-162.2 (+1 eps) [worker 3] episodes_seen=500 last_return=-119.7 (+1 eps) [worker 0] episodes_seen=510 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=510 last_return=-189.6 (+1 eps) [worker 2] episodes_seen=520 last_return=-159.7 (+1 eps) [worker 0] episodes_seen=520 last_return=-161.6 (+1 eps) [worker 1] episodes_seen=520 last_return=-118.1 (+1 eps) [worker 3] episodes_seen=510 last_return=-121.4 (+1 eps) [worker 2] episodes_seen=530 last_return=-108.0 (+1 eps) [worker 1] episodes_seen=530 last_return=-132.0 (+1 eps) [worker 0] episodes_seen=530 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=520 last_return=-117.8 (+1 eps) [worker 2] episodes_seen=540 last_return=-115.5 (+1 eps) [worker 3] episodes_seen=530 last_return=-106.9 (+1 eps) [worker 0] episodes_seen=540 last_return=-191.1 (+1 eps) [worker 1] episodes_seen=540 last_return=-168.4 (+1 eps) [worker 2] episodes_seen=550 last_return=-136.8 (+1 eps) [worker 3] episodes_seen=540 last_return=-156.8 (+1 eps) [worker 1] episodes_seen=550 last_return=-105.3 (+1 eps) [worker 0] episodes_seen=550 last_return=-135.8 (+1 eps) [worker 2] episodes_seen=560 last_return=-112.9 (+1 eps) [worker 3] episodes_seen=550 last_return=-152.9 (+1 eps) [worker 0] episodes_seen=560 last_return=-150.1 (+1 eps) [worker 1] episodes_seen=560 last_return=-178.8 (+1 eps) [worker 2] episodes_seen=570 last_return=-149.8 (+1 eps) [worker 3] episodes_seen=560 last_return=-137.0 (+1 eps) [worker 1] episodes_seen=570 last_return=-163.6 (+1 eps) [worker 0] episodes_seen=570 last_return=-192.1 (+1 eps) [worker 2] episodes_seen=580 last_return=-113.8 (+1 eps) [worker 3] episodes_seen=570 last_return=-145.0 (+1 eps) [worker 1] episodes_seen=580 last_return=-159.6 (+1 eps) [worker 0] episodes_seen=580 last_return=-135.8 (+1 eps) [worker 2] episodes_seen=590 last_return=-151.0 (+1 eps) [worker 3] episodes_seen=580 last_return=-119.3 (+1 eps) [worker 1] episodes_seen=590 last_return=-113.8 (+1 eps) [worker 0] episodes_seen=590 last_return=-188.3 (+1 eps) [worker 2] episodes_seen=600 last_return=-177.4 (+1 eps) [worker 3] episodes_seen=590 last_return=-145.8 (+1 eps) [A2C][sync] it= 7501 steps= 300040 (+ 40) avg10=-159.46 loss=62663644.000 pg=-0.080 vf=104439400.000 H=0.282 gn=19569806.000 [worker 1] episodes_seen=600 last_return=-125.0 (+1 eps) [worker 0] episodes_seen=600 last_return=-118.0 (+1 eps) [worker 1] episodes_seen=610 last_return=-130.9 (+1 eps) [worker 2] episodes_seen=610 last_return=-146.2 (+1 eps) [worker 3] episodes_seen=600 last_return=-195.1 (+1 eps) [worker 0] episodes_seen=610 last_return=-228.6 (+1 eps) [worker 1] episodes_seen=620 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=620 last_return=-173.9 (+1 eps) [worker 3] episodes_seen=610 last_return=-138.9 (+1 eps) [worker 0] episodes_seen=620 last_return=-167.0 (+1 eps) [worker 1] episodes_seen=630 last_return=-113.4 (+1 eps) [worker 2] episodes_seen=630 last_return=-118.9 (+1 eps) [worker 3] episodes_seen=620 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=630 last_return=-142.5 (+1 eps) [worker 2] episodes_seen=640 last_return=-130.1 (+1 eps) [worker 1] episodes_seen=640 last_return=-189.7 (+1 eps) [worker 3] episodes_seen=630 last_return=-132.2 (+1 eps) [worker 0] episodes_seen=640 last_return=-122.8 (+1 eps) [worker 3] episodes_seen=640 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=650 last_return=-155.3 (+1 eps) [worker 2] episodes_seen=650 last_return=-112.3 (+1 eps) [worker 1] episodes_seen=660 last_return=-152.4 (+1 eps) [worker 2] episodes_seen=660 last_return=-156.8 (+1 eps) [worker 3] episodes_seen=650 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=650 last_return=-169.1 (+1 eps) [worker 1] episodes_seen=670 last_return=-114.5 (+1 eps) [worker 3] episodes_seen=660 last_return=-107.2 (+1 eps) [worker 0] episodes_seen=660 last_return=-111.2 (+1 eps) [worker 2] episodes_seen=670 last_return=-181.1 (+1 eps) [worker 1] episodes_seen=680 last_return=-135.1 (+1 eps) [worker 3] episodes_seen=670 last_return=-139.7 (+1 eps) [worker 2] episodes_seen=680 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=670 last_return=-119.1 (+1 eps) [worker 1] episodes_seen=690 last_return=-112.9 (+1 eps) [worker 3] episodes_seen=680 last_return=-131.5 (+1 eps) [worker 0] episodes_seen=680 last_return=-134.4 (+1 eps) [worker 1] episodes_seen=700 last_return=-120.8 (+1 eps) [worker 2] episodes_seen=690 last_return=-105.4 (+1 eps) [worker 3] episodes_seen=690 last_return=-118.1 (+1 eps) [worker 0] episodes_seen=690 last_return=-156.9 (+1 eps) [worker 1] episodes_seen=710 last_return=-177.9 (+1 eps) [worker 2] episodes_seen=700 last_return=-112.6 (+1 eps) [worker 3] episodes_seen=700 last_return=-164.7 (+1 eps) [worker 1] episodes_seen=720 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=710 last_return=-112.2 (+1 eps) [A2C][sync] it= 9001 steps= 360040 (+ 40) avg10=-132.67 loss=21311.410 pg=-0.044 vf=35519.090 H=0.192 gn=510878.094 [worker 0] episodes_seen=700 last_return=-123.8 (+1 eps) [worker 3] episodes_seen=710 last_return=-138.3 (+1 eps) [worker 1] episodes_seen=730 last_return=-108.0 (+1 eps) [worker 0] episodes_seen=710 last_return=-118.9 (+1 eps) [worker 2] episodes_seen=720 last_return=-178.1 (+1 eps) [worker 3] episodes_seen=720 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=730 last_return=-179.2 (+1 eps) [worker 1] episodes_seen=740 last_return=-163.6 (+1 eps) [worker 0] episodes_seen=720 last_return=-171.9 (+1 eps) [worker 3] episodes_seen=730 last_return=-155.0 (+1 eps) [worker 1] episodes_seen=750 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=740 last_return=-209.1 (+1 eps) [worker 3] episodes_seen=740 last_return=-194.7 (+1 eps) [worker 0] episodes_seen=730 last_return=-166.5 (+1 eps) [worker 1] episodes_seen=760 last_return=-142.5 (+1 eps) [worker 2] episodes_seen=750 last_return=-146.4 (+1 eps) [worker 0] episodes_seen=740 last_return=-167.6 (+1 eps) [worker 3] episodes_seen=750 last_return=-118.7 (+1 eps) [worker 1] episodes_seen=770 last_return=-213.0 (+1 eps) [worker 2] episodes_seen=760 last_return=-125.4 (+1 eps) [worker 3] episodes_seen=760 last_return=-129.9 (+1 eps) [worker 0] episodes_seen=750 last_return=-196.4 (+1 eps) [worker 1] episodes_seen=780 last_return=-231.4 (+1 eps) [worker 2] episodes_seen=770 last_return=-130.1 (+1 eps) [worker 3] episodes_seen=770 last_return=-160.8 (+1 eps) [worker 0] episodes_seen=760 last_return=-116.8 (+1 eps) [worker 2] episodes_seen=780 last_return=-116.3 (+1 eps) [worker 1] episodes_seen=790 last_return=-191.5 (+1 eps) [worker 3] episodes_seen=780 last_return=-171.2 (+1 eps) [worker 0] episodes_seen=770 last_return=-136.1 (+1 eps) [worker 2] episodes_seen=790 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=800 last_return=-115.5 (+1 eps) [worker 3] episodes_seen=790 last_return=-123.6 (+1 eps) [worker 0] episodes_seen=780 last_return=-183.8 (+1 eps) [worker 1] episodes_seen=810 last_return=-169.7 (+1 eps) [worker 3] episodes_seen=800 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=800 last_return=-187.1 (+1 eps) [worker 0] episodes_seen=790 last_return=-147.5 (+1 eps) [worker 1] episodes_seen=820 last_return=-133.6 (+1 eps) [worker 3] episodes_seen=810 last_return=-110.5 (+1 eps) [worker 2] episodes_seen=810 last_return=-110.6 (+1 eps) [worker 0] episodes_seen=800 last_return=-158.4 (+1 eps) [worker 1] episodes_seen=830 last_return=-156.7 (+1 eps) [worker 3] episodes_seen=820 last_return=-109.7 (+1 eps) [worker 2] episodes_seen=820 last_return=-149.0 (+1 eps) [A2C][sync] it=10501 steps= 420040 (+ 40) avg10=-131.22 loss=22883.230 pg=-0.006 vf=38138.727 H=0.051 gn=243266.375 [worker 0] episodes_seen=810 last_return=-114.6 (+1 eps) [worker 3] episodes_seen=830 last_return=-117.2 (+1 eps) [worker 2] episodes_seen=830 last_return=-133.5 (+1 eps) [worker 1] episodes_seen=840 last_return=-109.2 (+1 eps) [worker 0] episodes_seen=820 last_return=-134.0 (+1 eps) [worker 3] episodes_seen=840 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=850 last_return=-130.6 (+1 eps) [worker 2] episodes_seen=840 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=830 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=860 last_return=-114.8 (+1 eps) [worker 3] episodes_seen=850 last_return=-143.3 (+1 eps) [worker 2] episodes_seen=850 last_return=-183.8 (+1 eps) [worker 3] episodes_seen=860 last_return=-128.0 (+1 eps) [worker 1] episodes_seen=870 last_return=-184.9 (+1 eps) [worker 0] episodes_seen=840 last_return=-145.2 (+1 eps) [worker 2] episodes_seen=860 last_return=-182.4 (+1 eps) [worker 3] episodes_seen=870 last_return=-137.1 (+1 eps) [worker 1] episodes_seen=880 last_return=-109.8 (+1 eps) [worker 0] episodes_seen=850 last_return=-106.9 (+1 eps) [worker 2] episodes_seen=870 last_return=-176.2 (+1 eps) [worker 3] episodes_seen=880 last_return=-149.2 (+1 eps) [worker 0] episodes_seen=860 last_return=-166.7 (+1 eps) [worker 1] episodes_seen=890 last_return=-149.6 (+1 eps) [worker 2] episodes_seen=880 last_return=-158.6 (+1 eps) [worker 3] episodes_seen=890 last_return=-129.9 (+1 eps) [worker 1] episodes_seen=900 last_return=-106.5 (+1 eps) [worker 0] episodes_seen=870 last_return=-115.7 (+1 eps) [worker 2] episodes_seen=890 last_return=-155.8 (+1 eps) [worker 3] episodes_seen=900 last_return=-111.4 (+1 eps) [worker 2] episodes_seen=900 last_return=-146.4 (+1 eps) [worker 1] episodes_seen=910 last_return=-120.7 (+1 eps) [worker 0] episodes_seen=880 last_return=-167.4 (+1 eps) [worker 3] episodes_seen=910 last_return=-69.3 (+1 eps) [worker 2] episodes_seen=910 last_return=-130.2 (+1 eps) [worker 1] episodes_seen=920 last_return=-182.3 (+1 eps) [worker 0] episodes_seen=890 last_return=-169.8 (+1 eps) [worker 3] episodes_seen=920 last_return=-165.5 (+1 eps) [worker 1] episodes_seen=930 last_return=-120.9 (+1 eps) [worker 2] episodes_seen=920 last_return=-161.5 (+1 eps) [worker 0] episodes_seen=900 last_return=-125.9 (+1 eps) [worker 3] episodes_seen=930 last_return=-110.9 (+1 eps) [worker 1] episodes_seen=940 last_return=-108.0 (+1 eps) [worker 2] episodes_seen=930 last_return=-1.6 (+1 eps) [worker 0] episodes_seen=910 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=940 last_return=43.9 (+1 eps) [worker 1] episodes_seen=950 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=920 last_return=-142.8 (+1 eps) [worker 2] episodes_seen=940 last_return=-107.4 (+1 eps) [worker 3] episodes_seen=950 last_return=-167.1 (+1 eps) [A2C][sync] it=12001 steps= 480040 (+ 40) avg10=-178.87 loss=158371.672 pg=0.011 vf=263952.750 H=0.061 gn=385584.312 [worker 1] episodes_seen=960 last_return=-142.2 (+1 eps) [worker 0] episodes_seen=930 last_return=-121.0 (+1 eps) [worker 2] episodes_seen=950 last_return=-151.3 (+1 eps) [worker 3] episodes_seen=960 last_return=-185.2 (+1 eps) [worker 1] episodes_seen=970 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=940 last_return=-163.5 (+1 eps) [worker 3] episodes_seen=970 last_return=-139.5 (+1 eps) [worker 1] episodes_seen=980 last_return=-114.7 (+1 eps) [worker 2] episodes_seen=960 last_return=-135.5 (+1 eps) [worker 0] episodes_seen=950 last_return=-153.4 (+1 eps) [worker 1] episodes_seen=990 last_return=-230.1 (+1 eps) [worker 3] episodes_seen=980 last_return=-160.1 (+1 eps) [worker 0] episodes_seen=960 last_return=-181.2 (+1 eps) [worker 2] episodes_seen=970 last_return=-117.7 (+1 eps) [worker 1] episodes_seen=1000 last_return=-135.0 (+1 eps) [worker 3] episodes_seen=990 last_return=-191.1 (+1 eps) [worker 2] episodes_seen=980 last_return=-169.0 (+1 eps) [worker 0] episodes_seen=970 last_return=-207.4 (+1 eps) [worker 1] episodes_seen=1010 last_return=-155.3 (+1 eps) [worker 3] episodes_seen=1000 last_return=-197.5 (+1 eps) [worker 0] episodes_seen=980 last_return=-111.0 (+1 eps) [worker 2] episodes_seen=990 last_return=-199.5 (+1 eps) [worker 1] episodes_seen=1020 last_return=-112.8 (+1 eps) [worker 3] episodes_seen=1010 last_return=-119.2 (+1 eps) [worker 0] episodes_seen=990 last_return=-113.7 (+1 eps) [worker 2] episodes_seen=1000 last_return=-142.7 (+1 eps) [worker 1] episodes_seen=1030 last_return=-287.8 (+1 eps) [worker 3] episodes_seen=1020 last_return=-150.8 (+1 eps) [worker 0] episodes_seen=1000 last_return=-212.4 (+1 eps) [worker 2] episodes_seen=1010 last_return=-192.4 (+1 eps) [worker 1] episodes_seen=1040 last_return=-113.2 (+1 eps) [worker 3] episodes_seen=1030 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1010 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1020 last_return=-194.5 (+1 eps) [worker 1] episodes_seen=1050 last_return=-114.8 (+1 eps) [worker 3] episodes_seen=1040 last_return=-191.3 (+1 eps) [worker 0] episodes_seen=1020 last_return=-219.0 (+1 eps) [worker 2] episodes_seen=1030 last_return=-228.2 (+1 eps) [worker 1] episodes_seen=1060 last_return=-222.2 (+1 eps) [worker 3] episodes_seen=1050 last_return=-102.1 (+1 eps) [worker 0] episodes_seen=1030 last_return=-124.2 (+1 eps) [worker 2] episodes_seen=1040 last_return=-119.7 (+1 eps) [worker 1] episodes_seen=1070 last_return=-133.1 (+1 eps) [worker 3] episodes_seen=1060 last_return=-137.2 (+1 eps) [worker 0] episodes_seen=1040 last_return=-160.2 (+1 eps) [worker 2] episodes_seen=1050 last_return=-125.7 (+1 eps) [worker 1] episodes_seen=1080 last_return=-101.5 (+1 eps) [worker 3] episodes_seen=1070 last_return=-206.7 (+1 eps) [worker 0] episodes_seen=1050 last_return=-106.6 (+1 eps) [worker 2] episodes_seen=1060 last_return=-145.5 (+1 eps) [worker 1] episodes_seen=1090 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=1080 last_return=-116.6 (+1 eps) [worker 2] episodes_seen=1070 last_return=-107.2 (+1 eps) [worker 0] episodes_seen=1060 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1100 last_return=-124.4 (+1 eps) [worker 3] episodes_seen=1090 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1080 last_return=-135.5 (+1 eps) [worker 0] episodes_seen=1070 last_return=-132.9 (+1 eps) [worker 3] episodes_seen=1100 last_return=-184.5 (+1 eps) [worker 1] episodes_seen=1110 last_return=-122.4 (+1 eps) [worker 2] episodes_seen=1090 last_return=-20.2 (+1 eps) [worker 0] episodes_seen=1080 last_return=-141.3 (+1 eps) [worker 1] episodes_seen=1120 last_return=-219.5 (+1 eps) [worker 3] episodes_seen=1110 last_return=-126.7 (+1 eps) [A2C][sync] it=13501 steps= 540040 (+ 40) avg10=-122.06 loss=292868.219 pg=0.000 vf=488113.688 H=0.042 gn=289394.719 [worker 2] episodes_seen=1100 last_return=-167.0 (+1 eps) [worker 0] episodes_seen=1090 last_return=-149.2 (+1 eps) [worker 3] episodes_seen=1120 last_return=-110.1 (+1 eps) [worker 1] episodes_seen=1130 last_return=-128.9 (+1 eps) [worker 2] episodes_seen=1110 last_return=-196.4 (+1 eps) [worker 0] episodes_seen=1100 last_return=-126.4 (+1 eps) [worker 3] episodes_seen=1130 last_return=-122.6 (+1 eps) [worker 1] episodes_seen=1140 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1120 last_return=-146.6 (+1 eps) [worker 0] episodes_seen=1110 last_return=-116.2 (+1 eps) [worker 3] episodes_seen=1140 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1150 last_return=-169.0 (+1 eps) [worker 2] episodes_seen=1130 last_return=-142.1 (+1 eps) [worker 3] episodes_seen=1150 last_return=-114.2 (+1 eps) [worker 0] episodes_seen=1120 last_return=-107.7 (+1 eps) [worker 1] episodes_seen=1160 last_return=-112.3 (+1 eps) [worker 2] episodes_seen=1140 last_return=-136.7 (+1 eps) [worker 3] episodes_seen=1160 last_return=-159.6 (+1 eps) [worker 0] episodes_seen=1130 last_return=-138.9 (+1 eps) [worker 1] episodes_seen=1170 last_return=-138.3 (+1 eps) [worker 2] episodes_seen=1150 last_return=-128.5 (+1 eps) [worker 0] episodes_seen=1140 last_return=-66.4 (+1 eps) [worker 3] episodes_seen=1170 last_return=-132.5 (+1 eps) [worker 1] episodes_seen=1180 last_return=-110.9 (+1 eps) [worker 2] episodes_seen=1160 last_return=-118.8 (+1 eps) [worker 0] episodes_seen=1150 last_return=-142.5 (+1 eps) [worker 3] episodes_seen=1180 last_return=-102.5 (+1 eps) [worker 1] episodes_seen=1190 last_return=-153.5 (+1 eps) [worker 2] episodes_seen=1170 last_return=-150.0 (+1 eps) [worker 0] episodes_seen=1160 last_return=-129.5 (+1 eps) [worker 3] episodes_seen=1190 last_return=-124.1 (+1 eps) [worker 2] episodes_seen=1180 last_return=-118.0 (+1 eps) [worker 1] episodes_seen=1200 last_return=-146.0 (+1 eps) [worker 0] episodes_seen=1170 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=1200 last_return=-168.5 (+1 eps) [worker 2] episodes_seen=1190 last_return=-190.8 (+1 eps) [worker 1] episodes_seen=1210 last_return=-168.4 (+1 eps) [worker 0] episodes_seen=1180 last_return=-142.6 (+1 eps) [worker 3] episodes_seen=1210 last_return=-130.2 (+1 eps) [worker 2] episodes_seen=1200 last_return=-142.3 (+1 eps) [worker 1] episodes_seen=1220 last_return=-110.2 (+1 eps) [worker 0] episodes_seen=1190 last_return=-189.8 (+1 eps) [worker 3] episodes_seen=1220 last_return=-133.9 (+1 eps) [worker 2] episodes_seen=1210 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1230 last_return=-164.3 (+1 eps) [worker 0] episodes_seen=1200 last_return=-177.5 (+1 eps) [A2C][sync] it=15001 steps= 600040 (+ 40) avg10=-125.21 loss=104355.383 pg=-0.274 vf=173926.094 H=0.726 gn=331836.906 [worker 3] episodes_seen=1230 last_return=-145.7 (+1 eps) [worker 2] episodes_seen=1220 last_return=-149.9 (+1 eps) [worker 1] episodes_seen=1240 last_return=-133.2 (+1 eps) [worker 0] episodes_seen=1210 last_return=-167.1 (+1 eps) [worker 3] episodes_seen=1240 last_return=-122.6 (+1 eps) [worker 1] episodes_seen=1250 last_return=-158.6 (+1 eps) [worker 2] episodes_seen=1230 last_return=-144.4 (+1 eps) [worker 0] episodes_seen=1220 last_return=-108.6 (+1 eps) [worker 3] episodes_seen=1250 last_return=-132.1 (+1 eps) [worker 2] episodes_seen=1240 last_return=-165.1 (+1 eps) [worker 1] episodes_seen=1260 last_return=-113.0 (+1 eps) [worker 3] episodes_seen=1260 last_return=-175.6 (+1 eps) [worker 0] episodes_seen=1230 last_return=-90.0 (+1 eps) [worker 1] episodes_seen=1270 last_return=-79.6 (+1 eps) [worker 2] episodes_seen=1250 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1280 last_return=-109.5 (+1 eps) [worker 0] episodes_seen=1240 last_return=-148.7 (+1 eps) [worker 3] episodes_seen=1270 last_return=-131.3 (+1 eps) [worker 2] episodes_seen=1260 last_return=-107.4 (+1 eps) [worker 1] episodes_seen=1290 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1250 last_return=-111.2 (+1 eps) [worker 2] episodes_seen=1270 last_return=-102.1 (+1 eps) [worker 3] episodes_seen=1280 last_return=-102.6 (+1 eps) [worker 1] episodes_seen=1300 last_return=-102.9 (+1 eps) [worker 0] episodes_seen=1260 last_return=-112.9 (+1 eps) [worker 2] episodes_seen=1280 last_return=-118.1 (+1 eps) [worker 3] episodes_seen=1290 last_return=-138.7 (+1 eps) [worker 1] episodes_seen=1310 last_return=-102.2 (+1 eps) [worker 2] episodes_seen=1290 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1270 last_return=-117.7 (+1 eps) [worker 3] episodes_seen=1300 last_return=-127.9 (+1 eps) [worker 1] episodes_seen=1320 last_return=-114.0 (+1 eps) [worker 0] episodes_seen=1280 last_return=-129.0 (+1 eps) [worker 2] episodes_seen=1300 last_return=-50.7 (+1 eps) [worker 3] episodes_seen=1310 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1330 last_return=-121.6 (+1 eps) [worker 0] episodes_seen=1290 last_return=-187.2 (+1 eps) [worker 2] episodes_seen=1310 last_return=-49.3 (+1 eps) [worker 3] episodes_seen=1320 last_return=-121.8 (+1 eps) [worker 1] episodes_seen=1340 last_return=-114.2 (+1 eps) [worker 0] episodes_seen=1300 last_return=-77.6 (+1 eps) [worker 2] episodes_seen=1320 last_return=-108.6 (+1 eps) [worker 3] episodes_seen=1330 last_return=-109.8 (+1 eps) [worker 1] episodes_seen=1350 last_return=-96.2 (+1 eps) [worker 0] episodes_seen=1310 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1330 last_return=-104.3 (+1 eps) [worker 3] episodes_seen=1340 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1360 last_return=-71.5 (+1 eps) [worker 0] episodes_seen=1320 last_return=-114.6 (+1 eps) [A2C][sync] it=16501 steps= 660040 (+ 40) avg10= -93.45 loss=54732.574 pg=-0.186 vf=91221.289 H=1.038 gn=79897.570 [worker 2] episodes_seen=1340 last_return=-90.6 (+1 eps) [worker 3] episodes_seen=1350 last_return=-119.1 (+1 eps) [worker 1] episodes_seen=1370 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1330 last_return=-95.7 (+1 eps) [worker 2] episodes_seen=1350 last_return=-95.6 (+1 eps) [worker 3] episodes_seen=1360 last_return=-91.7 (+1 eps) [worker 1] episodes_seen=1380 last_return=-94.0 (+1 eps) [worker 0] episodes_seen=1340 last_return=-97.4 (+1 eps) [worker 2] episodes_seen=1360 last_return=-100.7 (+1 eps) [worker 3] episodes_seen=1370 last_return=-92.9 (+1 eps) [worker 1] episodes_seen=1390 last_return=-91.9 (+1 eps) [worker 0] episodes_seen=1350 last_return=-97.6 (+1 eps) [worker 2] episodes_seen=1370 last_return=-96.6 (+1 eps) [worker 3] episodes_seen=1380 last_return=-27.0 (+1 eps) [worker 1] episodes_seen=1400 last_return=-88.8 (+1 eps) [worker 0] episodes_seen=1360 last_return=-94.1 (+1 eps) [worker 2] episodes_seen=1380 last_return=57.4 (+1 eps) [worker 3] episodes_seen=1390 last_return=-115.5 (+1 eps) [worker 1] episodes_seen=1410 last_return=-87.1 (+1 eps) [worker 0] episodes_seen=1370 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1390 last_return=-92.3 (+1 eps) [worker 3] episodes_seen=1400 last_return=-86.7 (+1 eps) [worker 1] episodes_seen=1420 last_return=-100.1 (+1 eps) [worker 0] episodes_seen=1380 last_return=-82.3 (+1 eps) [worker 2] episodes_seen=1400 last_return=-83.1 (+1 eps) [worker 3] episodes_seen=1410 last_return=-81.4 (+1 eps) [worker 1] episodes_seen=1430 last_return=-83.1 (+1 eps) [worker 0] episodes_seen=1390 last_return=-81.0 (+1 eps) [worker 2] episodes_seen=1410 last_return=-87.5 (+1 eps) [worker 3] episodes_seen=1420 last_return=-66.9 (+1 eps) [worker 1] episodes_seen=1440 last_return=-73.3 (+1 eps) [worker 2] episodes_seen=1420 last_return=-93.1 (+1 eps) [worker 1] episodes_seen=1450 last_return=-80.9 (+1 eps) [worker 3] episodes_seen=1430 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1400 last_return=-7.8 (+1 eps) [worker 2] episodes_seen=1430 last_return=-55.2 (+1 eps) [worker 1] episodes_seen=1460 last_return=-89.1 (+1 eps) [worker 3] episodes_seen=1440 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1440 last_return=-84.1 (+1 eps) [worker 0] episodes_seen=1410 last_return=-148.5 (+1 eps) [worker 1] episodes_seen=1470 last_return=-119.5 (+1 eps) [worker 3] episodes_seen=1450 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1450 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1420 last_return=-110.6 (+1 eps) [worker 1] episodes_seen=1480 last_return=-84.5 (+1 eps) [worker 3] episodes_seen=1460 last_return=-86.7 (+1 eps) [worker 2] episodes_seen=1460 last_return=-89.9 (+1 eps) [worker 0] episodes_seen=1430 last_return=-63.1 (+1 eps) [worker 1] episodes_seen=1490 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=1470 last_return=-86.9 (+1 eps) [worker 2] episodes_seen=1470 last_return=-106.0 (+1 eps) [worker 0] episodes_seen=1440 last_return=-71.3 (+1 eps) [worker 1] episodes_seen=1500 last_return=-60.2 (+1 eps) [worker 3] episodes_seen=1480 last_return=-86.2 (+1 eps) [worker 2] episodes_seen=1480 last_return=-89.4 (+1 eps) [worker 0] episodes_seen=1450 last_return=-104.0 (+1 eps) [worker 1] episodes_seen=1510 last_return=-75.5 (+1 eps) [worker 2] episodes_seen=1490 last_return=-94.3 (+1 eps) [worker 0] episodes_seen=1460 last_return=-83.9 (+1 eps) [worker 1] episodes_seen=1520 last_return=-79.3 (+1 eps) [worker 3] episodes_seen=1490 last_return=-88.7 (+1 eps) [worker 2] episodes_seen=1500 last_return=-86.8 (+1 eps) [worker 1] episodes_seen=1530 last_return=-80.6 (+1 eps) [worker 0] episodes_seen=1470 last_return=-57.4 (+1 eps) [A2C][sync] it=18001 steps= 720040 (+ 40) avg10= -85.00 loss=104.229 pg=-0.007 vf=173.750 H=0.922 gn=345.904 [worker 3] episodes_seen=1500 last_return=-89.8 (+1 eps) [worker 2] episodes_seen=1510 last_return=-63.6 (+1 eps) [worker 1] episodes_seen=1540 last_return=-108.9 (+1 eps) [worker 0] episodes_seen=1480 last_return=-85.7 (+1 eps) [worker 3] episodes_seen=1510 last_return=-74.4 (+1 eps) [worker 2] episodes_seen=1520 last_return=-79.5 (+1 eps) [worker 1] episodes_seen=1550 last_return=-70.4 (+1 eps) [worker 0] episodes_seen=1490 last_return=-105.4 (+1 eps) [worker 3] episodes_seen=1520 last_return=-87.0 (+1 eps) [worker 2] episodes_seen=1530 last_return=-86.2 (+1 eps) [worker 1] episodes_seen=1560 last_return=-89.8 (+1 eps) [worker 0] episodes_seen=1500 last_return=-72.0 (+1 eps) [worker 3] episodes_seen=1530 last_return=-90.3 (+1 eps) [worker 2] episodes_seen=1540 last_return=-83.8 (+1 eps) [worker 1] episodes_seen=1570 last_return=-110.8 (+1 eps) [worker 0] episodes_seen=1510 last_return=-96.5 (+1 eps) [worker 3] episodes_seen=1540 last_return=-111.3 (+1 eps) [worker 2] episodes_seen=1550 last_return=-95.5 (+1 eps) [worker 1] episodes_seen=1580 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1520 last_return=-83.8 (+1 eps) [worker 3] episodes_seen=1550 last_return=-82.3 (+1 eps) [worker 2] episodes_seen=1560 last_return=-88.3 (+1 eps) [worker 1] episodes_seen=1590 last_return=-0.1 (+1 eps) [worker 0] episodes_seen=1530 last_return=-71.3 (+1 eps) [worker 3] episodes_seen=1560 last_return=-80.5 (+1 eps) [worker 1] episodes_seen=1600 last_return=-96.0 (+1 eps) [worker 2] episodes_seen=1570 last_return=-100.4 (+1 eps) [worker 0] episodes_seen=1540 last_return=-84.8 (+1 eps) [worker 3] episodes_seen=1570 last_return=-70.5 (+1 eps) [worker 2] episodes_seen=1580 last_return=-79.6 (+1 eps) [worker 1] episodes_seen=1610 last_return=-57.1 (+1 eps) [worker 0] episodes_seen=1550 last_return=-80.5 (+1 eps) [worker 3] episodes_seen=1580 last_return=-96.6 (+1 eps) [worker 2] episodes_seen=1590 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1620 last_return=-57.8 (+1 eps) [worker 0] episodes_seen=1560 last_return=-85.3 (+1 eps) [worker 3] episodes_seen=1590 last_return=-77.3 (+1 eps) [worker 2] episodes_seen=1600 last_return=-88.1 (+1 eps) [worker 1] episodes_seen=1630 last_return=-87.7 (+1 eps) [worker 0] episodes_seen=1570 last_return=-66.1 (+1 eps) [worker 3] episodes_seen=1600 last_return=-82.7 (+1 eps) [worker 2] episodes_seen=1610 last_return=-85.6 (+1 eps) [worker 1] episodes_seen=1640 last_return=-93.6 (+1 eps) [worker 0] episodes_seen=1580 last_return=-90.4 (+1 eps) [worker 3] episodes_seen=1610 last_return=-97.4 (+1 eps) [worker 2] episodes_seen=1620 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1650 last_return=-67.7 (+1 eps) [worker 0] episodes_seen=1590 last_return=-76.4 (+1 eps) [worker 3] episodes_seen=1620 last_return=-98.8 (+1 eps) [worker 2] episodes_seen=1630 last_return=-84.6 (+1 eps) [worker 1] episodes_seen=1660 last_return=-91.1 (+1 eps) [worker 0] episodes_seen=1600 last_return=-94.5 (+1 eps) [worker 3] episodes_seen=1630 last_return=-76.2 (+1 eps) [worker 2] episodes_seen=1640 last_return=-100.2 (+1 eps) [worker 0] episodes_seen=1610 last_return=-107.5 (+1 eps) [worker 1] episodes_seen=1670 last_return=-70.3 (+1 eps) [worker 3] episodes_seen=1640 last_return=-73.8 (+1 eps) [worker 2] episodes_seen=1650 last_return=-108.7 (+1 eps) [worker 0] episodes_seen=1620 last_return=-90.1 (+1 eps) [worker 1] episodes_seen=1680 last_return=-81.6 (+1 eps) [worker 3] episodes_seen=1650 last_return=-85.9 (+1 eps) [worker 2] episodes_seen=1660 last_return=-87.4 (+1 eps) [worker 1] episodes_seen=1690 last_return=-94.9 (+1 eps) [worker 0] episodes_seen=1630 last_return=-95.2 (+1 eps) [worker 3] episodes_seen=1660 last_return=-92.2 (+1 eps) [A2C][sync] it=19501 steps= 780040 (+ 40) avg10= -84.41 loss=40.979 pg=-0.084 vf=68.465 H=1.036 gn=107.824 [worker 1] episodes_seen=1700 last_return=-92.6 (+1 eps) [worker 0] episodes_seen=1640 last_return=-59.8 (+1 eps) [worker 3] episodes_seen=1670 last_return=-79.7 (+1 eps) [worker 2] episodes_seen=1670 last_return=-94.4 (+1 eps) [worker 1] episodes_seen=1710 last_return=-79.4 (+1 eps) [worker 0] episodes_seen=1650 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=1680 last_return=-91.1 (+1 eps) [worker 2] episodes_seen=1680 last_return=-74.6 (+1 eps) [worker 1] episodes_seen=1720 last_return=-72.1 (+1 eps) [worker 0] episodes_seen=1660 last_return=-75.6 (+1 eps) [worker 3] episodes_seen=1690 last_return=-76.3 (+1 eps) [worker 2] episodes_seen=1690 last_return=-88.3 (+1 eps) [worker 1] episodes_seen=1730 last_return=-78.0 (+1 eps) [worker 0] episodes_seen=1670 last_return=-100.5 (+1 eps) [worker 3] episodes_seen=1700 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1700 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1740 last_return=-97.6 (+1 eps) [worker 0] episodes_seen=1680 last_return=-82.5 (+1 eps) [worker 3] episodes_seen=1710 last_return=-65.4 (+1 eps) [worker 2] episodes_seen=1710 last_return=-72.4 (+1 eps) [worker 1] episodes_seen=1750 last_return=-92.5 (+1 eps) [worker 0] episodes_seen=1690 last_return=-59.8 (+1 eps) [worker 3] episodes_seen=1720 last_return=-66.2 (+1 eps) [worker 1] episodes_seen=1760 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1720 last_return=-86.7 (+1 eps) [worker 0] episodes_seen=1700 last_return=-97.0 (+1 eps) [worker 3] episodes_seen=1730 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1770 last_return=-87.9 (+1 eps) [worker 2] episodes_seen=1730 last_return=-61.0 (+1 eps) [worker 0] episodes_seen=1710 last_return=-75.9 (+1 eps) [worker 3] episodes_seen=1740 last_return=-105.4 (+1 eps) [worker 1] episodes_seen=1780 last_return=-72.1 (+1 eps) [worker 0] episodes_seen=1720 last_return=-81.3 (+1 eps) [worker 2] episodes_seen=1740 last_return=-72.5 (+1 eps) [worker 3] episodes_seen=1750 last_return=-58.7 (+1 eps) [worker 1] episodes_seen=1790 last_return=-95.7 (+1 eps) [worker 0] episodes_seen=1730 last_return=-90.2 (+1 eps) [worker 2] episodes_seen=1750 last_return=-79.9 (+1 eps) [worker 3] episodes_seen=1760 last_return=-82.1 (+1 eps) [worker 1] episodes_seen=1800 last_return=-76.6 (+1 eps) [worker 0] episodes_seen=1740 last_return=-115.7 (+1 eps) [worker 2] episodes_seen=1760 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=1770 last_return=-65.1 (+1 eps) [worker 1] episodes_seen=1810 last_return=-73.0 (+1 eps) [worker 0] episodes_seen=1750 last_return=-72.1 (+1 eps) [worker 2] episodes_seen=1770 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=1780 last_return=-77.5 (+1 eps) [worker 1] episodes_seen=1820 last_return=-145.3 (+1 eps) [worker 0] episodes_seen=1760 last_return=-139.6 (+1 eps) [worker 2] episodes_seen=1780 last_return=-106.1 (+1 eps) [worker 3] episodes_seen=1790 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1830 last_return=-110.7 (+1 eps) [worker 0] episodes_seen=1770 last_return=-152.4 (+1 eps) [worker 2] episodes_seen=1790 last_return=-106.4 (+1 eps) [worker 3] episodes_seen=1800 last_return=-157.9 (+1 eps) [worker 1] episodes_seen=1840 last_return=-169.1 (+1 eps) [worker 0] episodes_seen=1780 last_return=-164.6 (+1 eps) [worker 2] episodes_seen=1800 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=1810 last_return=-69.6 (+1 eps) [A2C][sync] it=21001 steps= 840040 (+ 40) avg10= -98.90 loss=1106.009 pg=0.052 vf=1843.284 H=0.909 gn=1718.233 [worker 1] episodes_seen=1850 last_return=-79.2 (+1 eps) [worker 2] episodes_seen=1810 last_return=-64.0 (+1 eps) [worker 3] episodes_seen=1820 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1860 last_return=-78.9 (+1 eps) [worker 0] episodes_seen=1790 last_return=-67.1 (+1 eps) [worker 3] episodes_seen=1830 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1820 last_return=-82.7 (+1 eps) [worker 1] episodes_seen=1870 last_return=-84.8 (+1 eps) [worker 0] episodes_seen=1800 last_return=-75.2 (+1 eps) [worker 3] episodes_seen=1840 last_return=-83.3 (+1 eps) [worker 2] episodes_seen=1830 last_return=-77.0 (+1 eps) [worker 1] episodes_seen=1880 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1810 last_return=-93.6 (+1 eps) [worker 3] episodes_seen=1850 last_return=-83.2 (+1 eps) [worker 2] episodes_seen=1840 last_return=-88.3 (+1 eps) [worker 1] episodes_seen=1890 last_return=-64.7 (+1 eps) [worker 0] episodes_seen=1820 last_return=-77.7 (+1 eps) [worker 3] episodes_seen=1860 last_return=-76.5 (+1 eps) [worker 2] episodes_seen=1850 last_return=-85.0 (+1 eps) [worker 1] episodes_seen=1900 last_return=-76.1 (+1 eps) [worker 0] episodes_seen=1830 last_return=-71.5 (+1 eps) [worker 3] episodes_seen=1870 last_return=-71.1 (+1 eps) [worker 1] episodes_seen=1910 last_return=-59.4 (+1 eps) [worker 2] episodes_seen=1860 last_return=-85.9 (+1 eps) [worker 0] episodes_seen=1840 last_return=-70.9 (+1 eps) [worker 3] episodes_seen=1880 last_return=-85.2 (+1 eps) [worker 1] episodes_seen=1920 last_return=-73.2 (+1 eps) [worker 0] episodes_seen=1850 last_return=-102.4 (+1 eps) [worker 2] episodes_seen=1870 last_return=-102.6 (+1 eps) [worker 3] episodes_seen=1890 last_return=-74.3 (+1 eps) [worker 1] episodes_seen=1930 last_return=-76.7 (+1 eps) [worker 0] episodes_seen=1860 last_return=-138.5 (+1 eps) [worker 2] episodes_seen=1880 last_return=-93.8 (+1 eps) [worker 3] episodes_seen=1900 last_return=-80.3 (+1 eps) [worker 0] episodes_seen=1870 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1940 last_return=-102.9 (+1 eps) [worker 3] episodes_seen=1910 last_return=-155.4 (+1 eps) [worker 2] episodes_seen=1890 last_return=-141.0 (+1 eps) [worker 0] episodes_seen=1880 last_return=-167.0 (+1 eps) [worker 1] episodes_seen=1950 last_return=-156.0 (+1 eps) [worker 3] episodes_seen=1920 last_return=-165.6 (+1 eps) [worker 2] episodes_seen=1900 last_return=-207.8 (+1 eps) [worker 1] episodes_seen=1960 last_return=-174.7 (+1 eps) [worker 3] episodes_seen=1930 last_return=-121.8 (+1 eps) [worker 0] episodes_seen=1890 last_return=-166.4 (+1 eps) [worker 2] episodes_seen=1910 last_return=-183.9 (+1 eps) [worker 3] episodes_seen=1940 last_return=-169.9 (+1 eps) [worker 1] episodes_seen=1970 last_return=-154.1 (+1 eps) [worker 0] episodes_seen=1900 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1920 last_return=-63.4 (+1 eps) [worker 1] episodes_seen=1980 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=1950 last_return=-76.8 (+1 eps) [worker 0] episodes_seen=1910 last_return=-105.8 (+1 eps) [worker 2] episodes_seen=1930 last_return=-149.9 (+1 eps) [worker 1] episodes_seen=1990 last_return=-76.9 (+1 eps) [worker 0] episodes_seen=1920 last_return=-73.5 (+1 eps) [worker 2] episodes_seen=1940 last_return=-80.8 (+1 eps) [worker 3] episodes_seen=1960 last_return=-100.0 (+1 eps) [A2C][sync] it=22500 steps= 900000 (+ 40) avg10= -81.46 loss=19.554 pg=-0.006 vf=32.621 H=0.846 gn=403.646 [Checkpoint] Saved self-describing checkpoint → part2_artifacts/checkpoints/a2c_run13_seed1227.pth [A2C][sync] done: steps=900000 time=1177.1s avg10=-81.46
[Run run13_seed1227] checkpoint: part2_artifacts/checkpoints/a2c_run13_seed1227.pth [Run run13_seed1227] training plot (tail 500): part2_artifacts/train_curve_run13_seed1227.png [Run run13_seed1227] training plot (full): part2_artifacts/train_curve_full_run13_seed1227.png [Run run13_seed1227] per-worker plot: part2_artifacts/train_curve_workers_run13_seed1227.png [Run run13_seed1227] workers-average plot: part2_artifacts/train_curve_workers_avg_run13_seed1227.png
[Eval run13_seed1227] mean=-46.06 std=52.96 min=-146.62 max=26.21 [Eval run13_seed1227] CSV: part2_artifacts/eval10_run13_seed1227.csv [Eval run13_seed1227] plot: part2_artifacts/eval10_run13_seed1227.png [Best] ep=5 return=26.21 seed=1232
/usr/local/lib/python3.12/dist-packages/gymnasium/wrappers/rendering.py:293: UserWarning: WARN: Overwriting existing videos at /content/part2_artifacts/videos/run13_seed1227 folder (try specifying a different `video_folder` for the `RecordVideo` wrapper if this is not desired)
logger.warn(
[Video run13_seed1227] episode return=26.21 [Video run13_seed1227] saved under: part2_artifacts/videos run13_seed1227 | mean=-46.1±53.0 | best_ep=5, best_ret=26.2
Run#14
run_id = f"run14_seed{SEED}"
model, logs, paths = train_once(
run_id=run_id,
n_workers=2,
total_env_steps=700_000,
T=5,
gamma=0.99,
entropy_coef=0.015,
value_coef=0.60,
max_grad_norm=0.5,
lr=2e-4,
log_every=50_000,
)
metrics, _ = evaluate_10(run_id, paths.ckpt_path)
video_dir, best_idx, best_ret = record_best_from_eval(run_id, paths.ckpt_path)
print(f"{run_id} | mean={metrics['mean']:.1f}±{metrics['std']:.1f} | best_ep={best_idx}, best_ret={best_ret:.1f}")
[Run run14_seed1227] starting training… [A2C][sync] start: workers=2, T=5, target_steps=700000, mp=fork [A2C][sync] it= 1 steps= 10 (+ 10) avg10= nan loss=1.358 pg=0.000 vf=2.297 H=1.386 gn=1.308 [worker 0] episodes_seen=10 last_return=-132.5 (+1 eps) [worker 1] episodes_seen=10 last_return=-120.7 (+1 eps) [worker 0] episodes_seen=20 last_return=-138.2 (+1 eps) [worker 1] episodes_seen=20 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=30 last_return=-110.3 (+1 eps) [worker 1] episodes_seen=30 last_return=-113.4 (+1 eps) [worker 0] episodes_seen=40 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=40 last_return=-149.0 (+1 eps) [worker 1] episodes_seen=50 last_return=-133.5 (+1 eps) [worker 0] episodes_seen=50 last_return=-128.1 (+1 eps) [worker 0] episodes_seen=60 last_return=-127.8 (+1 eps) [worker 1] episodes_seen=60 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=70 last_return=-110.0 (+1 eps) [worker 0] episodes_seen=70 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=80 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=80 last_return=-117.6 (+1 eps) [worker 0] episodes_seen=90 last_return=-116.6 (+1 eps) [worker 1] episodes_seen=90 last_return=-142.5 (+1 eps) [worker 0] episodes_seen=100 last_return=-141.8 (+1 eps) [worker 1] episodes_seen=100 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=110 last_return=-133.9 (+1 eps) [worker 1] episodes_seen=110 last_return=-107.1 (+1 eps) [worker 0] episodes_seen=120 last_return=-116.7 (+1 eps) [worker 1] episodes_seen=120 last_return=-151.7 (+1 eps) [worker 0] episodes_seen=130 last_return=-125.9 (+1 eps) [worker 1] episodes_seen=130 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=140 last_return=-107.8 (+1 eps) [worker 0] episodes_seen=150 last_return=-117.4 (+1 eps) [worker 1] episodes_seen=140 last_return=-123.7 (+1 eps) [worker 0] episodes_seen=160 last_return=-132.1 (+1 eps) [worker 1] episodes_seen=150 last_return=-111.4 (+1 eps) [worker 0] episodes_seen=170 last_return=-112.2 (+1 eps) [worker 1] episodes_seen=160 last_return=-107.1 (+1 eps) [worker 0] episodes_seen=180 last_return=-109.3 (+1 eps) [worker 1] episodes_seen=170 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=190 last_return=-134.3 (+1 eps) [worker 1] episodes_seen=180 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=200 last_return=-127.3 (+1 eps) [worker 1] episodes_seen=190 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=210 last_return=-144.3 (+1 eps) [A2C][sync] it= 5001 steps= 50010 (+ 10) avg10=-116.56 loss=131.717 pg=-0.594 vf=220.534 H=0.693 gn=5290.416 [worker 1] episodes_seen=200 last_return=-115.4 (+1 eps) [worker 0] episodes_seen=220 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=210 last_return=-103.4 (+1 eps) [worker 0] episodes_seen=230 last_return=-116.9 (+1 eps) [worker 1] episodes_seen=220 last_return=-102.9 (+1 eps) [worker 0] episodes_seen=240 last_return=-100.5 (+1 eps) [worker 1] episodes_seen=230 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=250 last_return=-130.0 (+1 eps) [worker 1] episodes_seen=240 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=260 last_return=-103.1 (+1 eps) [worker 1] episodes_seen=250 last_return=-110.6 (+1 eps) [worker 0] episodes_seen=270 last_return=-139.1 (+1 eps) [worker 1] episodes_seen=260 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=280 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=270 last_return=-95.3 (+1 eps) [worker 0] episodes_seen=290 last_return=-131.3 (+1 eps) [worker 1] episodes_seen=280 last_return=-16.5 (+1 eps) [worker 0] episodes_seen=300 last_return=-98.5 (+1 eps) [worker 1] episodes_seen=290 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=300 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=310 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=310 last_return=-101.3 (+1 eps) [worker 0] episodes_seen=320 last_return=-102.9 (+1 eps) [worker 1] episodes_seen=320 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=330 last_return=-107.9 (+1 eps) [worker 1] episodes_seen=330 last_return=-106.1 (+1 eps) [worker 0] episodes_seen=340 last_return=-110.0 (+1 eps) [worker 0] episodes_seen=350 last_return=-128.5 (+1 eps) [worker 1] episodes_seen=340 last_return=-138.4 (+1 eps) [worker 0] episodes_seen=360 last_return=-117.0 (+1 eps) [worker 1] episodes_seen=350 last_return=-111.3 (+1 eps) [worker 0] episodes_seen=370 last_return=-100.7 (+1 eps) [worker 1] episodes_seen=360 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=380 last_return=-98.9 (+1 eps) [worker 1] episodes_seen=370 last_return=-108.9 (+1 eps) [worker 0] episodes_seen=390 last_return=-108.5 (+1 eps) [worker 1] episodes_seen=380 last_return=-85.7 (+1 eps) [A2C][sync] it=10001 steps= 100010 (+ 10) avg10=-113.69 loss=31301.125 pg=-0.141 vf=52168.781 H=0.292 gn=510750.125 [worker 0] episodes_seen=400 last_return=-101.3 (+1 eps) [worker 1] episodes_seen=390 last_return=-95.7 (+1 eps) [worker 0] episodes_seen=410 last_return=-136.3 (+1 eps) [worker 1] episodes_seen=400 last_return=-150.8 (+1 eps) [worker 0] episodes_seen=420 last_return=-140.1 (+1 eps) [worker 1] episodes_seen=410 last_return=-113.0 (+1 eps) [worker 1] episodes_seen=420 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=430 last_return=-110.5 (+1 eps) [worker 1] episodes_seen=430 last_return=-111.3 (+1 eps) [worker 0] episodes_seen=440 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=440 last_return=-117.5 (+1 eps) [worker 0] episodes_seen=450 last_return=-109.7 (+1 eps) [worker 1] episodes_seen=450 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=460 last_return=-111.1 (+1 eps) [worker 1] episodes_seen=460 last_return=-116.1 (+1 eps) [worker 0] episodes_seen=470 last_return=-133.2 (+1 eps) [worker 1] episodes_seen=470 last_return=-134.5 (+1 eps) [worker 0] episodes_seen=480 last_return=-124.9 (+1 eps) [worker 1] episodes_seen=480 last_return=-126.4 (+1 eps) [worker 0] episodes_seen=490 last_return=-123.8 (+1 eps) [worker 1] episodes_seen=490 last_return=-132.0 (+1 eps) [worker 0] episodes_seen=500 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=500 last_return=-111.7 (+1 eps) [worker 0] episodes_seen=510 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=510 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=520 last_return=-111.1 (+1 eps) [worker 1] episodes_seen=520 last_return=-105.5 (+1 eps) [worker 0] episodes_seen=530 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=530 last_return=-138.1 (+1 eps) [worker 0] episodes_seen=540 last_return=-74.6 (+1 eps) [worker 1] episodes_seen=540 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=550 last_return=-131.2 (+1 eps) [A2C][sync] it=15001 steps= 150010 (+ 10) avg10=-115.46 loss=391394.125 pg=0.355 vf=652322.938 H=0.512 gn=7574811.000 [worker 1] episodes_seen=550 last_return=-141.7 (+1 eps) [worker 0] episodes_seen=560 last_return=-95.7 (+1 eps) [worker 0] episodes_seen=570 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=560 last_return=-105.7 (+1 eps) [worker 1] episodes_seen=570 last_return=-111.0 (+1 eps) [worker 0] episodes_seen=580 last_return=-135.6 (+1 eps) [worker 1] episodes_seen=580 last_return=-119.4 (+1 eps) [worker 0] episodes_seen=590 last_return=-103.5 (+1 eps) [worker 1] episodes_seen=590 last_return=-113.3 (+1 eps) [worker 0] episodes_seen=600 last_return=-108.4 (+1 eps) [worker 1] episodes_seen=600 last_return=-100.2 (+1 eps) [worker 0] episodes_seen=610 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=610 last_return=-97.0 (+1 eps) [worker 0] episodes_seen=620 last_return=-153.2 (+1 eps) [worker 1] episodes_seen=620 last_return=-99.8 (+1 eps) [worker 0] episodes_seen=630 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=640 last_return=-136.9 (+1 eps) [worker 1] episodes_seen=630 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=650 last_return=-136.0 (+1 eps) [worker 1] episodes_seen=640 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=660 last_return=-121.6 (+1 eps) [worker 1] episodes_seen=650 last_return=-129.0 (+1 eps) [worker 0] episodes_seen=670 last_return=-111.7 (+1 eps) [worker 1] episodes_seen=660 last_return=-116.3 (+1 eps) [worker 0] episodes_seen=680 last_return=-111.9 (+1 eps) [worker 1] episodes_seen=670 last_return=-134.1 (+1 eps) [worker 0] episodes_seen=690 last_return=-122.1 (+1 eps) [worker 1] episodes_seen=680 last_return=-132.1 (+1 eps) [worker 0] episodes_seen=700 last_return=-128.6 (+1 eps) [worker 1] episodes_seen=690 last_return=-109.9 (+1 eps) [worker 0] episodes_seen=710 last_return=-111.1 (+1 eps) [A2C][sync] it=20001 steps= 200010 (+ 10) avg10=-119.92 loss=6015860.500 pg=-0.000 vf=10026434.000 H=0.000 gn=48382812.000 [worker 1] episodes_seen=700 last_return=-109.2 (+1 eps) [worker 0] episodes_seen=720 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=710 last_return=-118.5 (+1 eps) [worker 0] episodes_seen=730 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=720 last_return=-102.1 (+1 eps) [worker 0] episodes_seen=740 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=730 last_return=-144.7 (+1 eps) [worker 1] episodes_seen=740 last_return=-114.9 (+1 eps) [worker 0] episodes_seen=750 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=760 last_return=-135.5 (+1 eps) [worker 1] episodes_seen=750 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=770 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=760 last_return=-125.9 (+1 eps) [worker 0] episodes_seen=780 last_return=-139.6 (+1 eps) [worker 1] episodes_seen=770 last_return=-139.5 (+1 eps) [worker 0] episodes_seen=790 last_return=-104.4 (+1 eps) [worker 1] episodes_seen=780 last_return=-110.0 (+1 eps) [worker 0] episodes_seen=800 last_return=-117.3 (+1 eps) [worker 1] episodes_seen=790 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=810 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=800 last_return=-128.4 (+1 eps) [worker 0] episodes_seen=820 last_return=-124.3 (+1 eps) [worker 1] episodes_seen=810 last_return=-64.4 (+1 eps) [worker 0] episodes_seen=830 last_return=-121.7 (+1 eps) [worker 1] episodes_seen=820 last_return=-116.1 (+1 eps) [worker 0] episodes_seen=840 last_return=-126.0 (+1 eps) [worker 1] episodes_seen=830 last_return=-118.0 (+1 eps) [worker 0] episodes_seen=850 last_return=-135.2 (+1 eps) [worker 1] episodes_seen=840 last_return=-124.9 (+1 eps) [worker 0] episodes_seen=860 last_return=-120.1 (+1 eps) [worker 1] episodes_seen=850 last_return=-104.5 (+1 eps) [A2C][sync] it=25001 steps= 250010 (+ 10) avg10=-117.16 loss=12621836.000 pg=-0.285 vf=21036392.000 H=0.511 gn=122642624.000 [worker 0] episodes_seen=870 last_return=-96.3 (+1 eps) [worker 1] episodes_seen=860 last_return=-129.2 (+1 eps) [worker 0] episodes_seen=880 last_return=-131.0 (+1 eps) [worker 1] episodes_seen=870 last_return=-99.9 (+1 eps) [worker 0] episodes_seen=890 last_return=-112.3 (+1 eps) [worker 0] episodes_seen=900 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=880 last_return=-129.2 (+1 eps) [worker 1] episodes_seen=890 last_return=-140.4 (+1 eps) [worker 0] episodes_seen=910 last_return=-103.2 (+1 eps) [worker 1] episodes_seen=900 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=920 last_return=-97.4 (+1 eps) [worker 0] episodes_seen=930 last_return=-116.8 (+1 eps) [worker 1] episodes_seen=910 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=940 last_return=-121.1 (+1 eps) [worker 1] episodes_seen=920 last_return=-111.7 (+1 eps) [worker 0] episodes_seen=950 last_return=-106.1 (+1 eps) [worker 0] episodes_seen=960 last_return=-103.3 (+1 eps) [worker 1] episodes_seen=930 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=940 last_return=-126.4 (+1 eps) [worker 0] episodes_seen=970 last_return=-133.7 (+1 eps) [worker 1] episodes_seen=950 last_return=-113.5 (+1 eps) [worker 0] episodes_seen=980 last_return=-138.1 (+1 eps) [worker 1] episodes_seen=960 last_return=-130.0 (+1 eps) [worker 0] episodes_seen=990 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=970 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1000 last_return=-89.0 (+1 eps) [worker 1] episodes_seen=980 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1010 last_return=-109.7 (+1 eps) [worker 1] episodes_seen=990 last_return=-118.1 (+1 eps) [worker 0] episodes_seen=1020 last_return=-116.4 (+1 eps) [worker 1] episodes_seen=1000 last_return=-103.7 (+1 eps) [A2C][sync] it=30001 steps= 300010 (+ 10) avg10=-111.70 loss=656158.438 pg=0.356 vf=1093596.750 H=0.437 gn=7073249.500 [worker 0] episodes_seen=1030 last_return=-103.0 (+1 eps) [worker 0] episodes_seen=1040 last_return=-110.0 (+1 eps) [worker 1] episodes_seen=1010 last_return=-112.4 (+1 eps) [worker 1] episodes_seen=1020 last_return=-102.9 (+1 eps) [worker 0] episodes_seen=1050 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1030 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1060 last_return=-111.0 (+1 eps) [worker 1] episodes_seen=1040 last_return=-130.9 (+1 eps) [worker 0] episodes_seen=1070 last_return=-119.9 (+1 eps) [worker 1] episodes_seen=1050 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1080 last_return=-101.8 (+1 eps) [worker 1] episodes_seen=1060 last_return=-139.8 (+1 eps) [worker 0] episodes_seen=1090 last_return=-128.2 (+1 eps) [worker 1] episodes_seen=1070 last_return=-130.7 (+1 eps) [worker 0] episodes_seen=1100 last_return=-136.9 (+1 eps) [worker 1] episodes_seen=1080 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1110 last_return=-106.6 (+1 eps) [worker 1] episodes_seen=1090 last_return=-136.2 (+1 eps) [worker 0] episodes_seen=1120 last_return=-154.0 (+1 eps) [worker 1] episodes_seen=1100 last_return=-110.7 (+1 eps) [worker 0] episodes_seen=1130 last_return=-116.7 (+1 eps) [worker 1] episodes_seen=1110 last_return=-108.7 (+1 eps) [worker 0] episodes_seen=1140 last_return=-121.2 (+1 eps) [worker 1] episodes_seen=1120 last_return=-135.0 (+1 eps) [worker 1] episodes_seen=1130 last_return=-117.3 (+1 eps) [worker 0] episodes_seen=1150 last_return=-98.8 (+1 eps) [worker 1] episodes_seen=1140 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1160 last_return=-119.2 (+1 eps) [worker 0] episodes_seen=1170 last_return=-136.0 (+1 eps) [worker 1] episodes_seen=1150 last_return=-109.8 (+1 eps) [A2C][sync] it=35001 steps= 350010 (+ 10) avg10= -91.05 loss=49.099 pg=0.134 vf=81.637 H=1.145 gn=483.015 [worker 1] episodes_seen=1160 last_return=-105.5 (+1 eps) [worker 0] episodes_seen=1180 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1190 last_return=-109.5 (+1 eps) [worker 1] episodes_seen=1170 last_return=-98.0 (+1 eps) [worker 0] episodes_seen=1200 last_return=-132.4 (+1 eps) [worker 1] episodes_seen=1180 last_return=-102.2 (+1 eps) [worker 0] episodes_seen=1210 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1190 last_return=-133.7 (+1 eps) [worker 0] episodes_seen=1220 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1200 last_return=-101.3 (+1 eps) [worker 0] episodes_seen=1230 last_return=-116.0 (+1 eps) [worker 1] episodes_seen=1210 last_return=-137.7 (+1 eps) [worker 0] episodes_seen=1240 last_return=-89.9 (+1 eps) [worker 1] episodes_seen=1220 last_return=-107.8 (+1 eps) [worker 0] episodes_seen=1250 last_return=-119.8 (+1 eps) [worker 1] episodes_seen=1230 last_return=-137.6 (+1 eps) [worker 0] episodes_seen=1260 last_return=-156.1 (+1 eps) [worker 1] episodes_seen=1240 last_return=-125.6 (+1 eps) [worker 0] episodes_seen=1270 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1250 last_return=-92.7 (+1 eps) [worker 0] episodes_seen=1280 last_return=-80.3 (+1 eps) [worker 1] episodes_seen=1260 last_return=-109.3 (+1 eps) [worker 1] episodes_seen=1270 last_return=-109.5 (+1 eps) [worker 0] episodes_seen=1290 last_return=-102.9 (+1 eps) [worker 0] episodes_seen=1300 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1280 last_return=-99.1 (+1 eps) [worker 0] episodes_seen=1310 last_return=-121.7 (+1 eps) [worker 1] episodes_seen=1290 last_return=-85.1 (+1 eps) [worker 0] episodes_seen=1320 last_return=-99.2 (+1 eps) [worker 1] episodes_seen=1300 last_return=-118.5 (+1 eps) [A2C][sync] it=40001 steps= 400010 (+ 10) avg10=-115.57 loss=72631440.000 pg=-0.509 vf=121052400.000 H=0.670 gn=134611360.000 [worker 0] episodes_seen=1330 last_return=-109.9 (+1 eps) [worker 1] episodes_seen=1310 last_return=-101.4 (+1 eps) [worker 0] episodes_seen=1340 last_return=-105.8 (+1 eps) [worker 1] episodes_seen=1320 last_return=-118.0 (+1 eps) [worker 0] episodes_seen=1350 last_return=-100.6 (+1 eps) [worker 1] episodes_seen=1330 last_return=-99.3 (+1 eps) [worker 0] episodes_seen=1360 last_return=-111.3 (+1 eps) [worker 1] episodes_seen=1340 last_return=-82.1 (+1 eps) [worker 0] episodes_seen=1370 last_return=-127.6 (+1 eps) [worker 1] episodes_seen=1350 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1380 last_return=-88.8 (+1 eps) [worker 1] episodes_seen=1360 last_return=-110.3 (+1 eps) [worker 0] episodes_seen=1390 last_return=-99.3 (+1 eps) [worker 1] episodes_seen=1370 last_return=-93.9 (+1 eps) [worker 0] episodes_seen=1400 last_return=-93.4 (+1 eps) [worker 1] episodes_seen=1380 last_return=-92.6 (+1 eps) [worker 0] episodes_seen=1410 last_return=-86.1 (+1 eps) [worker 1] episodes_seen=1390 last_return=-71.5 (+1 eps) [worker 0] episodes_seen=1420 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1400 last_return=-97.0 (+1 eps) [worker 0] episodes_seen=1430 last_return=-110.3 (+1 eps) [worker 0] episodes_seen=1440 last_return=-93.1 (+1 eps) [worker 1] episodes_seen=1410 last_return=-102.1 (+1 eps) [worker 1] episodes_seen=1420 last_return=-89.3 (+1 eps) [worker 0] episodes_seen=1450 last_return=-81.7 (+1 eps) [worker 1] episodes_seen=1430 last_return=-96.9 (+1 eps) [worker 0] episodes_seen=1460 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1440 last_return=-99.6 (+1 eps) [A2C][sync] it=45001 steps= 450010 (+ 10) avg10= -95.84 loss=15335131.000 pg=-0.083 vf=25558550.000 H=0.263 gn=9180225.000 [worker 0] episodes_seen=1470 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1450 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1480 last_return=-113.2 (+1 eps) [worker 1] episodes_seen=1460 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1470 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1490 last_return=-114.5 (+1 eps) [worker 1] episodes_seen=1480 last_return=-94.1 (+1 eps) [worker 0] episodes_seen=1500 last_return=-75.9 (+1 eps) [worker 1] episodes_seen=1490 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1510 last_return=-78.1 (+1 eps) [worker 1] episodes_seen=1500 last_return=-86.2 (+1 eps) [worker 0] episodes_seen=1520 last_return=-83.1 (+1 eps) [worker 1] episodes_seen=1510 last_return=-80.9 (+1 eps) [worker 0] episodes_seen=1530 last_return=-87.3 (+1 eps) [worker 1] episodes_seen=1520 last_return=-102.8 (+1 eps) [worker 0] episodes_seen=1540 last_return=-72.0 (+1 eps) [worker 1] episodes_seen=1530 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1550 last_return=-102.4 (+1 eps) [worker 1] episodes_seen=1540 last_return=-90.3 (+1 eps) [worker 0] episodes_seen=1560 last_return=-99.3 (+1 eps) [worker 1] episodes_seen=1550 last_return=-82.9 (+1 eps) [worker 0] episodes_seen=1570 last_return=-70.9 (+1 eps) [worker 1] episodes_seen=1560 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1580 last_return=-77.8 (+1 eps) [worker 1] episodes_seen=1570 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1590 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1580 last_return=-79.5 (+1 eps) [worker 0] episodes_seen=1600 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1590 last_return=-85.6 (+1 eps) [worker 0] episodes_seen=1610 last_return=-95.3 (+1 eps) [worker 1] episodes_seen=1600 last_return=-96.6 (+1 eps) [worker 0] episodes_seen=1620 last_return=-77.4 (+1 eps) [worker 1] episodes_seen=1610 last_return=-80.0 (+1 eps) [worker 0] episodes_seen=1630 last_return=-88.8 (+1 eps) [worker 1] episodes_seen=1620 last_return=-89.4 (+1 eps) [worker 0] episodes_seen=1640 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1630 last_return=-74.3 (+1 eps) [worker 0] episodes_seen=1650 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1640 last_return=-92.7 (+1 eps) [worker 0] episodes_seen=1660 last_return=-86.7 (+1 eps) [worker 1] episodes_seen=1650 last_return=-91.1 (+1 eps) [worker 0] episodes_seen=1670 last_return=-77.3 (+1 eps) [worker 1] episodes_seen=1660 last_return=-96.3 (+1 eps) [worker 0] episodes_seen=1680 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1670 last_return=-89.9 (+1 eps) [worker 0] episodes_seen=1690 last_return=-73.2 (+1 eps) [worker 1] episodes_seen=1680 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1700 last_return=-93.3 (+1 eps) [worker 1] episodes_seen=1690 last_return=-94.0 (+1 eps) [worker 0] episodes_seen=1710 last_return=-74.8 (+1 eps) [worker 1] episodes_seen=1700 last_return=-92.0 (+1 eps) [worker 0] episodes_seen=1720 last_return=-91.7 (+1 eps) [worker 1] episodes_seen=1710 last_return=-79.0 (+1 eps) [worker 0] episodes_seen=1730 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1720 last_return=-91.6 (+1 eps) [worker 0] episodes_seen=1740 last_return=-92.4 (+1 eps) [worker 1] episodes_seen=1730 last_return=-73.1 (+1 eps) [worker 0] episodes_seen=1750 last_return=-94.7 (+1 eps) [worker 1] episodes_seen=1740 last_return=-87.6 (+1 eps) [worker 0] episodes_seen=1760 last_return=-109.0 (+1 eps) [worker 1] episodes_seen=1750 last_return=-100.0 (+1 eps) [A2C][sync] it=50001 steps= 500010 (+ 10) avg10= -87.15 loss=241121.672 pg=-0.637 vf=401870.500 H=0.367 gn=51821.570 [worker 0] episodes_seen=1770 last_return=-39.6 (+1 eps) [worker 1] episodes_seen=1760 last_return=-69.9 (+1 eps) [worker 0] episodes_seen=1780 last_return=-51.6 (+1 eps) [worker 1] episodes_seen=1770 last_return=-77.1 (+1 eps) [worker 0] episodes_seen=1790 last_return=-81.6 (+1 eps) [worker 1] episodes_seen=1780 last_return=-71.2 (+1 eps) [worker 1] episodes_seen=1790 last_return=-75.6 (+1 eps) [worker 0] episodes_seen=1800 last_return=-145.8 (+1 eps) [worker 1] episodes_seen=1800 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1810 last_return=-91.4 (+1 eps) [worker 0] episodes_seen=1810 last_return=-94.1 (+1 eps) [worker 1] episodes_seen=1820 last_return=-93.8 (+1 eps) [worker 0] episodes_seen=1820 last_return=-71.8 (+1 eps) [worker 1] episodes_seen=1830 last_return=-67.8 (+1 eps) [worker 0] episodes_seen=1830 last_return=-112.6 (+1 eps) [worker 0] episodes_seen=1840 last_return=-156.3 (+1 eps) [worker 1] episodes_seen=1840 last_return=-77.5 (+1 eps) [worker 0] episodes_seen=1850 last_return=-85.3 (+1 eps) [worker 1] episodes_seen=1850 last_return=-75.3 (+1 eps) [worker 0] episodes_seen=1860 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1860 last_return=-106.2 (+1 eps) [worker 0] episodes_seen=1870 last_return=-70.8 (+1 eps) [worker 1] episodes_seen=1870 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1880 last_return=-40.2 (+1 eps) [worker 1] episodes_seen=1880 last_return=-72.7 (+1 eps) [worker 0] episodes_seen=1890 last_return=-106.8 (+1 eps) [worker 1] episodes_seen=1890 last_return=-60.5 (+1 eps) [worker 0] episodes_seen=1900 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1900 last_return=-101.3 (+1 eps) [worker 0] episodes_seen=1910 last_return=-59.2 (+1 eps) [worker 1] episodes_seen=1910 last_return=-99.2 (+1 eps) [worker 0] episodes_seen=1920 last_return=-118.5 (+1 eps) [worker 1] episodes_seen=1920 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1930 last_return=-85.7 (+1 eps) [worker 1] episodes_seen=1930 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1940 last_return=-86.3 (+1 eps) [worker 1] episodes_seen=1940 last_return=-91.9 (+1 eps) [worker 0] episodes_seen=1950 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1950 last_return=-101.3 (+1 eps) [worker 0] episodes_seen=1960 last_return=-107.9 (+1 eps) [worker 1] episodes_seen=1960 last_return=-64.2 (+1 eps) [worker 0] episodes_seen=1970 last_return=-101.8 (+1 eps) [worker 1] episodes_seen=1970 last_return=-130.8 (+1 eps) [worker 0] episodes_seen=1980 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1980 last_return=-48.4 (+1 eps) [worker 0] episodes_seen=1990 last_return=-91.0 (+1 eps) [worker 1] episodes_seen=1990 last_return=-102.8 (+1 eps) [worker 0] episodes_seen=2000 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2000 last_return=-74.4 (+1 eps) [worker 0] episodes_seen=2010 last_return=-75.8 (+1 eps) [worker 1] episodes_seen=2010 last_return=-113.1 (+1 eps) [worker 0] episodes_seen=2020 last_return=-96.1 (+1 eps) [worker 1] episodes_seen=2020 last_return=-83.1 (+1 eps) [worker 0] episodes_seen=2030 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2030 last_return=-110.4 (+1 eps) [A2C][sync] it=55001 steps= 550010 (+ 10) avg10=-127.34 loss=163.985 pg=0.000 vf=273.308 H=0.005 gn=2855.886 [worker 0] episodes_seen=2040 last_return=-134.0 (+1 eps) [worker 1] episodes_seen=2040 last_return=-139.8 (+1 eps) [worker 0] episodes_seen=2050 last_return=-120.6 (+1 eps) [worker 1] episodes_seen=2050 last_return=-124.1 (+1 eps) [worker 0] episodes_seen=2060 last_return=-110.3 (+1 eps) [worker 1] episodes_seen=2060 last_return=-125.0 (+1 eps) [worker 0] episodes_seen=2070 last_return=-127.2 (+1 eps) [worker 1] episodes_seen=2070 last_return=-143.6 (+1 eps) [worker 0] episodes_seen=2080 last_return=-148.7 (+1 eps) [worker 0] episodes_seen=2090 last_return=-132.7 (+1 eps) [worker 1] episodes_seen=2080 last_return=-105.9 (+1 eps) [worker 0] episodes_seen=2100 last_return=-117.0 (+1 eps) [worker 1] episodes_seen=2090 last_return=-109.9 (+1 eps) [worker 0] episodes_seen=2110 last_return=-110.9 (+1 eps) [worker 0] episodes_seen=2120 last_return=-123.7 (+1 eps) [worker 1] episodes_seen=2100 last_return=-109.9 (+1 eps) [worker 0] episodes_seen=2130 last_return=-119.7 (+1 eps) [worker 1] episodes_seen=2110 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=2140 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2120 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=2150 last_return=-113.9 (+1 eps) [worker 1] episodes_seen=2130 last_return=-137.9 (+1 eps) [worker 0] episodes_seen=2160 last_return=-120.0 (+1 eps) [worker 1] episodes_seen=2140 last_return=-142.9 (+1 eps) [worker 0] episodes_seen=2170 last_return=-124.5 (+1 eps) [worker 1] episodes_seen=2150 last_return=-108.2 (+1 eps) [worker 0] episodes_seen=2180 last_return=-130.7 (+1 eps) [worker 1] episodes_seen=2160 last_return=-132.2 (+1 eps) [worker 0] episodes_seen=2190 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2170 last_return=-125.3 (+1 eps) [worker 0] episodes_seen=2200 last_return=-125.5 (+1 eps) [worker 1] episodes_seen=2180 last_return=-120.8 (+1 eps) [worker 0] episodes_seen=2210 last_return=-121.8 (+1 eps) [worker 1] episodes_seen=2190 last_return=-111.4 (+1 eps) [worker 0] episodes_seen=2220 last_return=-135.4 (+1 eps) [worker 1] episodes_seen=2200 last_return=-132.1 (+1 eps) [worker 0] episodes_seen=2230 last_return=-145.3 (+1 eps) [worker 0] episodes_seen=2240 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2210 last_return=-134.1 (+1 eps) [worker 0] episodes_seen=2250 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2220 last_return=-110.8 (+1 eps) [A2C][sync] it=60001 steps= 600010 (+ 10) avg10=-125.60 loss=3745.012 pg=-0.000 vf=6241.686 H=0.000 gn=99725.578 [worker 0] episodes_seen=2260 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2230 last_return=-107.1 (+1 eps) [worker 0] episodes_seen=2270 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2240 last_return=-167.2 (+1 eps) [worker 0] episodes_seen=2280 last_return=-127.9 (+1 eps) [worker 1] episodes_seen=2250 last_return=-120.0 (+1 eps) [worker 0] episodes_seen=2290 last_return=-122.0 (+1 eps) [worker 1] episodes_seen=2260 last_return=-124.4 (+1 eps) [worker 0] episodes_seen=2300 last_return=-109.8 (+1 eps) [worker 1] episodes_seen=2270 last_return=-145.5 (+1 eps) [worker 0] episodes_seen=2310 last_return=-117.3 (+1 eps) [worker 1] episodes_seen=2280 last_return=-126.7 (+1 eps) [worker 0] episodes_seen=2320 last_return=-130.8 (+1 eps) [worker 1] episodes_seen=2290 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=2330 last_return=-126.5 (+1 eps) [worker 1] episodes_seen=2300 last_return=-133.3 (+1 eps) [worker 0] episodes_seen=2340 last_return=-131.0 (+1 eps) [worker 1] episodes_seen=2310 last_return=-129.7 (+1 eps) [worker 0] episodes_seen=2350 last_return=-113.0 (+1 eps) [worker 1] episodes_seen=2320 last_return=-145.0 (+1 eps) [worker 0] episodes_seen=2360 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2330 last_return=-106.2 (+1 eps) [worker 0] episodes_seen=2370 last_return=-144.8 (+1 eps) [worker 1] episodes_seen=2340 last_return=-118.6 (+1 eps) [worker 0] episodes_seen=2380 last_return=-106.8 (+1 eps) [worker 1] episodes_seen=2350 last_return=-128.8 (+1 eps) [worker 0] episodes_seen=2390 last_return=-121.0 (+1 eps) [worker 1] episodes_seen=2360 last_return=-115.3 (+1 eps) [worker 0] episodes_seen=2400 last_return=-201.4 (+1 eps) [worker 1] episodes_seen=2370 last_return=-125.1 (+1 eps) [worker 0] episodes_seen=2410 last_return=-119.3 (+1 eps) [worker 1] episodes_seen=2380 last_return=-138.8 (+1 eps) [worker 1] episodes_seen=2390 last_return=-130.0 (+1 eps) [worker 0] episodes_seen=2420 last_return=-163.3 (+1 eps) [worker 1] episodes_seen=2400 last_return=-131.0 (+1 eps) [worker 0] episodes_seen=2430 last_return=-130.9 (+1 eps) [worker 0] episodes_seen=2440 last_return=-121.6 (+1 eps) [worker 1] episodes_seen=2410 last_return=-122.2 (+1 eps) [worker 0] episodes_seen=2450 last_return=-119.8 (+1 eps) [worker 1] episodes_seen=2420 last_return=-134.4 (+1 eps) [worker 0] episodes_seen=2460 last_return=-100.0 (+1 eps) [A2C][sync] it=65001 steps= 650010 (+ 10) avg10=-115.81 loss=526.047 pg=-0.000 vf=876.745 H=0.000 gn=14066.069 [worker 1] episodes_seen=2430 last_return=-129.2 (+1 eps) [worker 0] episodes_seen=2470 last_return=-126.3 (+1 eps) [worker 1] episodes_seen=2440 last_return=-156.0 (+1 eps) [worker 0] episodes_seen=2480 last_return=-137.2 (+1 eps) [worker 1] episodes_seen=2450 last_return=-120.6 (+1 eps) [worker 0] episodes_seen=2490 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2460 last_return=-111.2 (+1 eps) [worker 0] episodes_seen=2500 last_return=-118.7 (+1 eps) [worker 1] episodes_seen=2470 last_return=-112.4 (+1 eps) [worker 0] episodes_seen=2510 last_return=-119.1 (+1 eps) [worker 1] episodes_seen=2480 last_return=-115.5 (+1 eps) [worker 0] episodes_seen=2520 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2490 last_return=-142.4 (+1 eps) [worker 0] episodes_seen=2530 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2500 last_return=-132.1 (+1 eps) [worker 0] episodes_seen=2540 last_return=-111.5 (+1 eps) [worker 1] episodes_seen=2510 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=2550 last_return=-129.0 (+1 eps) [worker 1] episodes_seen=2520 last_return=-127.7 (+1 eps) [worker 0] episodes_seen=2560 last_return=-127.8 (+1 eps) [worker 1] episodes_seen=2530 last_return=-125.3 (+1 eps) [worker 0] episodes_seen=2570 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2540 last_return=-125.9 (+1 eps) [worker 0] episodes_seen=2580 last_return=-133.8 (+1 eps) [worker 1] episodes_seen=2550 last_return=-123.1 (+1 eps) [worker 0] episodes_seen=2590 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2560 last_return=-106.9 (+1 eps) [worker 1] episodes_seen=2570 last_return=-114.3 (+1 eps) [worker 0] episodes_seen=2600 last_return=-110.9 (+1 eps) [worker 1] episodes_seen=2580 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=2610 last_return=-111.1 (+1 eps) [worker 0] episodes_seen=2620 last_return=-120.7 (+1 eps) [worker 1] episodes_seen=2590 last_return=-114.8 (+1 eps) [worker 0] episodes_seen=2630 last_return=-118.2 (+1 eps) [worker 1] episodes_seen=2600 last_return=-136.0 (+1 eps) [worker 0] episodes_seen=2640 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2610 last_return=-114.4 (+1 eps) [worker 0] episodes_seen=2650 last_return=-112.8 (+1 eps) [worker 1] episodes_seen=2620 last_return=-106.9 (+1 eps) [A2C][sync] it=70000 steps= 700000 (+ 10) avg10=-121.82 loss=151.677 pg=-0.000 vf=252.794 H=0.000 gn=8272.357 [Checkpoint] Saved self-describing checkpoint → part2_artifacts/checkpoints/a2c_run14_seed1227.pth [A2C][sync] done: steps=700000 time=1507.9s avg10=-121.82
[Run run14_seed1227] checkpoint: part2_artifacts/checkpoints/a2c_run14_seed1227.pth [Run run14_seed1227] training plot (tail 500): part2_artifacts/train_curve_run14_seed1227.png [Run run14_seed1227] training plot (full): part2_artifacts/train_curve_full_run14_seed1227.png [Run run14_seed1227] per-worker plot: part2_artifacts/train_curve_workers_run14_seed1227.png [Run run14_seed1227] workers-average plot: part2_artifacts/train_curve_workers_avg_run14_seed1227.png
[Eval run14_seed1227] mean=-693.82 std=113.17 min=-927.28 max=-562.23 [Eval run14_seed1227] CSV: part2_artifacts/eval10_run14_seed1227.csv [Eval run14_seed1227] plot: part2_artifacts/eval10_run14_seed1227.png [Best] ep=6 return=-562.23 seed=1233
/usr/local/lib/python3.12/dist-packages/gymnasium/wrappers/rendering.py:293: UserWarning: WARN: Overwriting existing videos at /content/part2_artifacts/videos/run14_seed1227 folder (try specifying a different `video_folder` for the `RecordVideo` wrapper if this is not desired)
logger.warn(
[Video run14_seed1227] episode return=-562.23 [Video run14_seed1227] saved under: part2_artifacts/videos run14_seed1227 | mean=-693.8±113.2 | best_ep=6, best_ret=-562.2
Run#15
run_id = f"run15_seed{SEED}"
model, logs, paths = train_once(
run_id=run_id,
n_workers=3,
total_env_steps=1_000_000,
T=20,
gamma=0.99,
entropy_coef=0.010,
value_coef=0.60,
max_grad_norm=0.5,
lr=3e-4,
log_every=50_000,
)
metrics, _ = evaluate_10(run_id, paths.ckpt_path)
video_dir, best_idx, best_ret = record_best_from_eval(run_id, paths.ckpt_path)
print(f"{run_id} | mean={metrics['mean']:.1f}±{metrics['std']:.1f} | best_ep={best_idx}, best_ret={best_ret:.1f}")
[Run run15_seed1227] starting training… [A2C][sync] start: workers=3, T=20, target_steps=1000000, mp=fork [A2C][sync] it= 1 steps= 60 (+ 60) avg10= nan loss=47.319 pg=-0.000 vf=78.888 H=1.386 gn=23.628 [worker 2] episodes_seen=10 last_return=-170.2 (+1 eps) [worker 0] episodes_seen=10 last_return=-214.0 (+1 eps) [worker 1] episodes_seen=10 last_return=-257.0 (+1 eps) [worker 2] episodes_seen=20 last_return=-86.5 (+1 eps) [worker 0] episodes_seen=20 last_return=-108.3 (+1 eps) [worker 1] episodes_seen=20 last_return=-175.7 (+1 eps) [worker 0] episodes_seen=30 last_return=-162.2 (+1 eps) [worker 1] episodes_seen=30 last_return=-268.0 (+1 eps) [worker 2] episodes_seen=30 last_return=-133.0 (+1 eps) [worker 0] episodes_seen=40 last_return=-120.3 (+1 eps) [worker 1] episodes_seen=40 last_return=-10.7 (+1 eps) [worker 2] episodes_seen=40 last_return=-246.7 (+1 eps) [worker 0] episodes_seen=50 last_return=-210.8 (+1 eps) [worker 2] episodes_seen=50 last_return=-128.2 (+1 eps) [worker 1] episodes_seen=50 last_return=-179.0 (+1 eps) [worker 0] episodes_seen=60 last_return=-361.5 (+1 eps) [worker 2] episodes_seen=60 last_return=-118.9 (+1 eps) [worker 1] episodes_seen=60 last_return=-238.3 (+1 eps) [worker 0] episodes_seen=70 last_return=-166.3 (+1 eps) [worker 2] episodes_seen=70 last_return=-104.3 (+1 eps) [worker 1] episodes_seen=70 last_return=-282.5 (+1 eps) [worker 0] episodes_seen=80 last_return=-317.8 (+1 eps) [worker 2] episodes_seen=80 last_return=-131.2 (+1 eps) [worker 1] episodes_seen=80 last_return=-51.8 (+1 eps) [worker 0] episodes_seen=90 last_return=-84.9 (+1 eps) [worker 2] episodes_seen=90 last_return=-352.8 (+1 eps) [worker 1] episodes_seen=90 last_return=-131.5 (+1 eps) [worker 0] episodes_seen=100 last_return=-195.4 (+1 eps) [worker 2] episodes_seen=100 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=100 last_return=-30.3 (+1 eps) [worker 0] episodes_seen=110 last_return=-132.5 (+1 eps) [worker 2] episodes_seen=110 last_return=-240.2 (+1 eps) [worker 1] episodes_seen=110 last_return=-109.9 (+1 eps) [worker 0] episodes_seen=120 last_return=-191.6 (+1 eps) [worker 2] episodes_seen=120 last_return=-159.8 (+1 eps) [worker 1] episodes_seen=120 last_return=-112.2 (+1 eps) [worker 0] episodes_seen=130 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=130 last_return=-366.5 (+1 eps) [worker 1] episodes_seen=130 last_return=-305.1 (+1 eps) [worker 0] episodes_seen=140 last_return=-374.3 (+1 eps) [worker 2] episodes_seen=140 last_return=-261.7 (+1 eps) [worker 1] episodes_seen=140 last_return=-277.9 (+1 eps) [worker 0] episodes_seen=150 last_return=-20.8 (+1 eps) [worker 2] episodes_seen=150 last_return=-298.1 (+1 eps) [worker 1] episodes_seen=150 last_return=-209.3 (+1 eps) [worker 0] episodes_seen=160 last_return=-163.1 (+1 eps) [worker 2] episodes_seen=160 last_return=-567.3 (+1 eps) [worker 1] episodes_seen=160 last_return=-117.2 (+1 eps) [worker 0] episodes_seen=170 last_return=-266.8 (+1 eps) [worker 2] episodes_seen=170 last_return=-263.1 (+1 eps) [worker 1] episodes_seen=170 last_return=-368.6 (+1 eps) [worker 0] episodes_seen=180 last_return=-171.6 (+1 eps) [worker 2] episodes_seen=180 last_return=-352.7 (+1 eps) [worker 1] episodes_seen=180 last_return=-245.6 (+1 eps) [worker 0] episodes_seen=190 last_return=-413.1 (+1 eps) [worker 2] episodes_seen=190 last_return=-281.8 (+1 eps) [worker 1] episodes_seen=190 last_return=-202.2 (+1 eps) [worker 0] episodes_seen=200 last_return=-271.5 (+1 eps) [worker 2] episodes_seen=200 last_return=-243.0 (+1 eps) [worker 1] episodes_seen=200 last_return=-285.3 (+1 eps) [worker 0] episodes_seen=210 last_return=-44.9 (+1 eps) [A2C][sync] it= 835 steps= 50100 (+ 60) avg10=-221.75 loss=15.433 pg=-0.000 vf=25.722 H=0.001 gn=503.225 [worker 2] episodes_seen=210 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=210 last_return=-176.0 (+1 eps) [worker 0] episodes_seen=220 last_return=-243.1 (+1 eps) [worker 2] episodes_seen=220 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=220 last_return=-175.4 (+1 eps) [worker 0] episodes_seen=230 last_return=-289.5 (+1 eps) [worker 2] episodes_seen=230 last_return=-223.1 (+1 eps) [worker 1] episodes_seen=230 last_return=-259.8 (+1 eps) [worker 0] episodes_seen=240 last_return=-238.9 (+1 eps) [worker 2] episodes_seen=240 last_return=-129.6 (+1 eps) [worker 1] episodes_seen=240 last_return=-214.3 (+1 eps) [worker 0] episodes_seen=250 last_return=-319.1 (+1 eps) [worker 2] episodes_seen=250 last_return=-202.1 (+1 eps) [worker 1] episodes_seen=250 last_return=-210.2 (+1 eps) [worker 0] episodes_seen=260 last_return=-164.4 (+1 eps) [worker 2] episodes_seen=260 last_return=-329.8 (+1 eps) [worker 1] episodes_seen=260 last_return=-139.9 (+1 eps) [worker 0] episodes_seen=270 last_return=-123.9 (+1 eps) [worker 2] episodes_seen=270 last_return=-187.9 (+1 eps) [worker 1] episodes_seen=270 last_return=-188.1 (+1 eps) [worker 0] episodes_seen=280 last_return=-85.5 (+1 eps) [worker 2] episodes_seen=280 last_return=-264.9 (+1 eps) [worker 1] episodes_seen=280 last_return=-134.7 (+1 eps) [worker 0] episodes_seen=290 last_return=-122.2 (+1 eps) [worker 2] episodes_seen=290 last_return=-270.8 (+1 eps) [worker 1] episodes_seen=290 last_return=-143.3 (+1 eps) [worker 0] episodes_seen=300 last_return=-244.9 (+1 eps) [worker 2] episodes_seen=300 last_return=-220.3 (+1 eps) [worker 1] episodes_seen=300 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=310 last_return=-59.7 (+1 eps) [worker 2] episodes_seen=310 last_return=-329.0 (+1 eps) [worker 1] episodes_seen=310 last_return=-150.1 (+1 eps) [worker 0] episodes_seen=320 last_return=-127.9 (+1 eps) [worker 2] episodes_seen=320 last_return=-230.4 (+1 eps) [worker 1] episodes_seen=320 last_return=-114.2 (+1 eps) [worker 0] episodes_seen=330 last_return=-296.2 (+1 eps) [worker 2] episodes_seen=330 last_return=-230.4 (+1 eps) [worker 1] episodes_seen=330 last_return=-212.4 (+1 eps) [worker 0] episodes_seen=340 last_return=-342.7 (+1 eps) [worker 2] episodes_seen=340 last_return=-243.2 (+1 eps) [worker 1] episodes_seen=340 last_return=-177.6 (+1 eps) [worker 0] episodes_seen=350 last_return=-268.6 (+1 eps) [worker 2] episodes_seen=350 last_return=-339.7 (+1 eps) [worker 1] episodes_seen=350 last_return=-224.7 (+1 eps) [worker 0] episodes_seen=360 last_return=-272.2 (+1 eps) [worker 2] episodes_seen=360 last_return=-165.9 (+1 eps) [worker 1] episodes_seen=360 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=370 last_return=-353.6 (+1 eps) [worker 2] episodes_seen=370 last_return=-215.1 (+1 eps) [worker 1] episodes_seen=370 last_return=-325.4 (+1 eps) [worker 0] episodes_seen=380 last_return=-322.5 (+1 eps) [worker 2] episodes_seen=380 last_return=-172.3 (+1 eps) [worker 1] episodes_seen=380 last_return=-284.3 (+1 eps) [worker 0] episodes_seen=390 last_return=-186.7 (+1 eps) [worker 2] episodes_seen=390 last_return=-112.3 (+1 eps) [worker 1] episodes_seen=390 last_return=-36.5 (+1 eps) [worker 0] episodes_seen=400 last_return=-285.3 (+1 eps) [worker 2] episodes_seen=400 last_return=-192.5 (+1 eps) [worker 1] episodes_seen=400 last_return=-365.5 (+1 eps) [worker 0] episodes_seen=410 last_return=-87.1 (+1 eps) [worker 2] episodes_seen=410 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=410 last_return=-127.8 (+1 eps) [worker 0] episodes_seen=420 last_return=-190.7 (+1 eps) [worker 2] episodes_seen=420 last_return=-113.8 (+1 eps) [worker 1] episodes_seen=420 last_return=-250.8 (+1 eps) [worker 0] episodes_seen=430 last_return=-147.0 (+1 eps) [worker 2] episodes_seen=430 last_return=-293.2 (+1 eps) [worker 1] episodes_seen=430 last_return=-275.1 (+1 eps) [worker 0] episodes_seen=440 last_return=-237.5 (+1 eps) [worker 2] episodes_seen=440 last_return=-324.3 (+1 eps) [worker 1] episodes_seen=440 last_return=-37.3 (+1 eps) [worker 0] episodes_seen=450 last_return=-303.8 (+1 eps) [worker 2] episodes_seen=450 last_return=-143.2 (+1 eps) [worker 1] episodes_seen=450 last_return=-186.0 (+1 eps) [A2C][sync] it= 1669 steps= 100140 (+ 60) avg10=-210.11 loss=471.198 pg=0.000 vf=785.330 H=0.000 gn=5668.293 [worker 0] episodes_seen=460 last_return=-305.6 (+1 eps) [worker 2] episodes_seen=460 last_return=-297.5 (+1 eps) [worker 1] episodes_seen=460 last_return=-288.0 (+1 eps) [worker 0] episodes_seen=470 last_return=-359.4 (+1 eps) [worker 2] episodes_seen=470 last_return=-228.8 (+1 eps) [worker 1] episodes_seen=470 last_return=-136.8 (+1 eps) [worker 0] episodes_seen=480 last_return=26.8 (+1 eps) [worker 2] episodes_seen=480 last_return=-206.5 (+1 eps) [worker 1] episodes_seen=480 last_return=-232.4 (+1 eps) [worker 0] episodes_seen=490 last_return=-196.4 (+1 eps) [worker 2] episodes_seen=490 last_return=-261.8 (+1 eps) [worker 1] episodes_seen=490 last_return=-181.7 (+1 eps) [worker 0] episodes_seen=500 last_return=-144.5 (+1 eps) [worker 2] episodes_seen=500 last_return=-307.6 (+1 eps) [worker 1] episodes_seen=500 last_return=-172.3 (+1 eps) [worker 0] episodes_seen=510 last_return=-275.8 (+1 eps) [worker 2] episodes_seen=510 last_return=-194.8 (+1 eps) [worker 1] episodes_seen=510 last_return=-268.6 (+1 eps) [worker 0] episodes_seen=520 last_return=-134.3 (+1 eps) [worker 2] episodes_seen=520 last_return=-120.7 (+1 eps) [worker 1] episodes_seen=520 last_return=28.8 (+1 eps) [worker 2] episodes_seen=530 last_return=-301.4 (+1 eps) [worker 0] episodes_seen=530 last_return=-115.8 (+1 eps) [worker 1] episodes_seen=530 last_return=-335.7 (+1 eps) [worker 0] episodes_seen=540 last_return=-203.2 (+1 eps) [worker 2] episodes_seen=540 last_return=-182.9 (+1 eps) [worker 1] episodes_seen=540 last_return=-120.7 (+1 eps) [worker 0] episodes_seen=550 last_return=-177.7 (+1 eps) [worker 2] episodes_seen=550 last_return=-221.2 (+1 eps) [worker 1] episodes_seen=550 last_return=-117.8 (+1 eps) [worker 0] episodes_seen=560 last_return=-112.7 (+1 eps) [worker 2] episodes_seen=560 last_return=-217.2 (+1 eps) [worker 1] episodes_seen=560 last_return=-123.5 (+1 eps) [worker 2] episodes_seen=570 last_return=-158.3 (+1 eps) [worker 0] episodes_seen=570 last_return=-253.6 (+1 eps) [worker 1] episodes_seen=570 last_return=-152.9 (+1 eps) [worker 2] episodes_seen=580 last_return=-243.5 (+1 eps) [worker 0] episodes_seen=580 last_return=-174.3 (+1 eps) [worker 1] episodes_seen=580 last_return=-315.1 (+1 eps) [worker 0] episodes_seen=590 last_return=-245.4 (+1 eps) [worker 2] episodes_seen=590 last_return=-232.2 (+1 eps) [worker 1] episodes_seen=590 last_return=-261.4 (+1 eps) [worker 0] episodes_seen=600 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=600 last_return=-263.4 (+1 eps) [worker 1] episodes_seen=600 last_return=-144.8 (+1 eps) [worker 0] episodes_seen=610 last_return=-84.1 (+1 eps) [worker 2] episodes_seen=610 last_return=-308.4 (+1 eps) [worker 1] episodes_seen=610 last_return=-181.2 (+1 eps) [worker 0] episodes_seen=620 last_return=-71.9 (+1 eps) [worker 2] episodes_seen=620 last_return=-338.6 (+1 eps) [worker 1] episodes_seen=620 last_return=-312.7 (+1 eps) [worker 2] episodes_seen=630 last_return=-217.9 (+1 eps) [worker 0] episodes_seen=630 last_return=-172.9 (+1 eps) [worker 1] episodes_seen=630 last_return=-138.0 (+1 eps) [worker 2] episodes_seen=640 last_return=-198.2 (+1 eps) [worker 0] episodes_seen=640 last_return=-266.4 (+1 eps) [worker 1] episodes_seen=640 last_return=-323.2 (+1 eps) [worker 2] episodes_seen=650 last_return=-214.9 (+1 eps) [worker 0] episodes_seen=650 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=650 last_return=-383.9 (+1 eps) [worker 2] episodes_seen=660 last_return=71.0 (+1 eps) [worker 0] episodes_seen=660 last_return=-280.6 (+1 eps) [worker 1] episodes_seen=660 last_return=-238.7 (+1 eps) [worker 2] episodes_seen=670 last_return=-256.6 (+1 eps) [worker 0] episodes_seen=670 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=670 last_return=-198.9 (+1 eps) [worker 2] episodes_seen=680 last_return=-222.6 (+1 eps) [worker 0] episodes_seen=680 last_return=-124.6 (+1 eps) [worker 1] episodes_seen=680 last_return=-269.9 (+1 eps) [worker 2] episodes_seen=690 last_return=-239.0 (+1 eps) [worker 0] episodes_seen=690 last_return=-219.7 (+1 eps) [worker 1] episodes_seen=690 last_return=-164.8 (+1 eps) [worker 2] episodes_seen=700 last_return=-166.3 (+1 eps) [worker 0] episodes_seen=700 last_return=-270.8 (+1 eps) [worker 1] episodes_seen=700 last_return=-263.3 (+1 eps) [A2C][sync] it= 2503 steps= 150180 (+ 60) avg10=-157.65 loss=167.974 pg=0.000 vf=279.957 H=0.000 gn=1521.082 [worker 2] episodes_seen=710 last_return=-269.9 (+1 eps) [worker 0] episodes_seen=710 last_return=-202.9 (+1 eps) [worker 1] episodes_seen=710 last_return=-309.0 (+1 eps) [worker 2] episodes_seen=720 last_return=-246.2 (+1 eps) [worker 0] episodes_seen=720 last_return=-281.7 (+1 eps) [worker 1] episodes_seen=720 last_return=-265.1 (+1 eps) [worker 2] episodes_seen=730 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=730 last_return=-216.6 (+1 eps) [worker 1] episodes_seen=730 last_return=-258.9 (+1 eps) [worker 2] episodes_seen=740 last_return=-235.0 (+1 eps) [worker 0] episodes_seen=740 last_return=-101.3 (+1 eps) [worker 1] episodes_seen=740 last_return=-98.2 (+1 eps) [worker 2] episodes_seen=750 last_return=-198.5 (+1 eps) [worker 0] episodes_seen=750 last_return=-124.3 (+1 eps) [worker 1] episodes_seen=750 last_return=-272.7 (+1 eps) [worker 2] episodes_seen=760 last_return=-150.1 (+1 eps) [worker 0] episodes_seen=760 last_return=-182.5 (+1 eps) [worker 1] episodes_seen=760 last_return=-198.5 (+1 eps) [worker 2] episodes_seen=770 last_return=-234.2 (+1 eps) [worker 1] episodes_seen=770 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=770 last_return=-185.2 (+1 eps) [worker 2] episodes_seen=780 last_return=-314.2 (+1 eps) [worker 1] episodes_seen=780 last_return=-13.6 (+1 eps) [worker 0] episodes_seen=780 last_return=-225.8 (+1 eps) [worker 2] episodes_seen=790 last_return=-353.7 (+1 eps) [worker 1] episodes_seen=790 last_return=-195.2 (+1 eps) [worker 0] episodes_seen=790 last_return=-189.3 (+1 eps) [worker 2] episodes_seen=800 last_return=-175.6 (+1 eps) [worker 1] episodes_seen=800 last_return=-304.4 (+1 eps) [worker 0] episodes_seen=800 last_return=-216.6 (+1 eps) [worker 2] episodes_seen=810 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=810 last_return=-270.1 (+1 eps) [worker 0] episodes_seen=810 last_return=-333.0 (+1 eps) [worker 2] episodes_seen=820 last_return=-223.9 (+1 eps) [worker 1] episodes_seen=820 last_return=-121.1 (+1 eps) [worker 0] episodes_seen=820 last_return=-284.5 (+1 eps) [worker 2] episodes_seen=830 last_return=-74.0 (+1 eps) [worker 1] episodes_seen=830 last_return=-302.2 (+1 eps) [worker 0] episodes_seen=830 last_return=-225.5 (+1 eps) [worker 2] episodes_seen=840 last_return=-159.9 (+1 eps) [worker 1] episodes_seen=840 last_return=-248.3 (+1 eps) [worker 0] episodes_seen=840 last_return=-244.3 (+1 eps) [worker 2] episodes_seen=850 last_return=-357.3 (+1 eps) [worker 1] episodes_seen=850 last_return=-265.3 (+1 eps) [worker 0] episodes_seen=850 last_return=-148.0 (+1 eps) [worker 2] episodes_seen=860 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=860 last_return=-157.9 (+1 eps) [worker 0] episodes_seen=860 last_return=-185.9 (+1 eps) [worker 2] episodes_seen=870 last_return=-102.7 (+1 eps) [worker 1] episodes_seen=870 last_return=-260.2 (+1 eps) [worker 0] episodes_seen=870 last_return=-164.2 (+1 eps) [worker 2] episodes_seen=880 last_return=-171.2 (+1 eps) [worker 1] episodes_seen=880 last_return=-254.8 (+1 eps) [worker 0] episodes_seen=880 last_return=-135.7 (+1 eps) [worker 2] episodes_seen=890 last_return=-135.7 (+1 eps) [worker 1] episodes_seen=890 last_return=-299.5 (+1 eps) [worker 0] episodes_seen=890 last_return=-189.9 (+1 eps) [worker 2] episodes_seen=900 last_return=-273.3 (+1 eps) [worker 1] episodes_seen=900 last_return=-288.1 (+1 eps) [worker 0] episodes_seen=900 last_return=-250.1 (+1 eps) [worker 2] episodes_seen=910 last_return=-292.5 (+1 eps) [worker 1] episodes_seen=910 last_return=-292.3 (+1 eps) [worker 0] episodes_seen=910 last_return=-299.5 (+1 eps) [worker 2] episodes_seen=920 last_return=-278.4 (+1 eps) [worker 1] episodes_seen=920 last_return=-45.2 (+1 eps) [worker 0] episodes_seen=920 last_return=-317.6 (+1 eps) [worker 2] episodes_seen=930 last_return=-179.3 (+1 eps) [worker 1] episodes_seen=930 last_return=-330.0 (+1 eps) [worker 0] episodes_seen=930 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=940 last_return=-215.8 (+1 eps) [worker 1] episodes_seen=940 last_return=-111.6 (+1 eps) [worker 0] episodes_seen=940 last_return=-121.7 (+1 eps) [worker 2] episodes_seen=950 last_return=-309.5 (+1 eps) [worker 1] episodes_seen=950 last_return=-246.0 (+1 eps) [A2C][sync] it= 3337 steps= 200220 (+ 60) avg10=-224.34 loss=76.014 pg=0.000 vf=126.690 H=0.000 gn=1286.950 [worker 0] episodes_seen=950 last_return=-123.0 (+1 eps) [worker 2] episodes_seen=960 last_return=-137.9 (+1 eps) [worker 1] episodes_seen=960 last_return=-171.8 (+1 eps) [worker 0] episodes_seen=960 last_return=-209.4 (+1 eps) [worker 2] episodes_seen=970 last_return=-235.9 (+1 eps) [worker 1] episodes_seen=970 last_return=-135.6 (+1 eps) [worker 0] episodes_seen=970 last_return=-190.6 (+1 eps) [worker 2] episodes_seen=980 last_return=-253.4 (+1 eps) [worker 1] episodes_seen=980 last_return=-289.9 (+1 eps) [worker 0] episodes_seen=980 last_return=-223.0 (+1 eps) [worker 2] episodes_seen=990 last_return=-193.9 (+1 eps) [worker 1] episodes_seen=990 last_return=-255.3 (+1 eps) [worker 0] episodes_seen=990 last_return=-127.4 (+1 eps) [worker 2] episodes_seen=1000 last_return=-112.9 (+1 eps) [worker 1] episodes_seen=1000 last_return=-265.7 (+1 eps) [worker 0] episodes_seen=1000 last_return=-162.6 (+1 eps) [worker 2] episodes_seen=1010 last_return=-149.9 (+1 eps) [worker 1] episodes_seen=1010 last_return=-169.7 (+1 eps) [worker 0] episodes_seen=1010 last_return=-172.6 (+1 eps) [worker 2] episodes_seen=1020 last_return=-269.1 (+1 eps) [worker 1] episodes_seen=1020 last_return=-154.4 (+1 eps) [worker 0] episodes_seen=1020 last_return=-272.5 (+1 eps) [worker 2] episodes_seen=1030 last_return=-115.7 (+1 eps) [worker 1] episodes_seen=1030 last_return=-177.7 (+1 eps) [worker 0] episodes_seen=1030 last_return=-237.9 (+1 eps) [worker 2] episodes_seen=1040 last_return=-163.4 (+1 eps) [worker 1] episodes_seen=1040 last_return=-228.5 (+1 eps) [worker 0] episodes_seen=1040 last_return=-148.4 (+1 eps) [worker 2] episodes_seen=1050 last_return=-165.6 (+1 eps) [worker 0] episodes_seen=1050 last_return=-123.4 (+1 eps) [worker 1] episodes_seen=1050 last_return=-128.8 (+1 eps) [worker 2] episodes_seen=1060 last_return=-124.2 (+1 eps) [worker 0] episodes_seen=1060 last_return=-201.2 (+1 eps) [worker 1] episodes_seen=1060 last_return=-226.4 (+1 eps) [worker 1] episodes_seen=1070 last_return=-238.4 (+1 eps) [worker 2] episodes_seen=1070 last_return=-142.9 (+1 eps) [worker 0] episodes_seen=1070 last_return=-247.5 (+1 eps) [worker 1] episodes_seen=1080 last_return=-185.1 (+1 eps) [worker 2] episodes_seen=1080 last_return=-237.1 (+1 eps) [worker 0] episodes_seen=1080 last_return=-207.9 (+1 eps) [worker 1] episodes_seen=1090 last_return=-251.8 (+1 eps) [worker 2] episodes_seen=1090 last_return=-147.6 (+1 eps) [worker 0] episodes_seen=1090 last_return=-150.7 (+1 eps) [worker 1] episodes_seen=1100 last_return=-270.2 (+1 eps) [worker 2] episodes_seen=1100 last_return=-224.2 (+1 eps) [worker 0] episodes_seen=1100 last_return=-243.4 (+1 eps) [worker 1] episodes_seen=1110 last_return=-131.2 (+1 eps) [worker 2] episodes_seen=1110 last_return=-278.3 (+1 eps) [worker 0] episodes_seen=1110 last_return=-260.0 (+1 eps) [worker 1] episodes_seen=1120 last_return=-214.5 (+1 eps) [worker 2] episodes_seen=1120 last_return=-161.3 (+1 eps) [worker 0] episodes_seen=1120 last_return=-251.8 (+1 eps) [worker 1] episodes_seen=1130 last_return=-140.8 (+1 eps) [worker 2] episodes_seen=1130 last_return=-154.6 (+1 eps) [worker 0] episodes_seen=1130 last_return=-295.4 (+1 eps) [worker 1] episodes_seen=1140 last_return=-244.4 (+1 eps) [worker 2] episodes_seen=1140 last_return=-336.6 (+1 eps) [worker 0] episodes_seen=1140 last_return=-89.9 (+1 eps) [worker 1] episodes_seen=1150 last_return=-146.3 (+1 eps) [A2C][sync] it= 4171 steps= 250260 (+ 60) avg10=-180.42 loss=23.347 pg=-0.027 vf=38.956 H=0.015 gn=451.052 [worker 2] episodes_seen=1150 last_return=-326.6 (+1 eps) [worker 0] episodes_seen=1150 last_return=-265.2 (+1 eps) [worker 1] episodes_seen=1160 last_return=-297.3 (+1 eps) [worker 2] episodes_seen=1160 last_return=-313.0 (+1 eps) [worker 0] episodes_seen=1160 last_return=-228.5 (+1 eps) [worker 1] episodes_seen=1170 last_return=-138.9 (+1 eps) [worker 2] episodes_seen=1170 last_return=-128.4 (+1 eps) [worker 0] episodes_seen=1170 last_return=-248.9 (+1 eps) [worker 1] episodes_seen=1180 last_return=-110.2 (+1 eps) [worker 0] episodes_seen=1180 last_return=-265.7 (+1 eps) [worker 2] episodes_seen=1180 last_return=-258.5 (+1 eps) [worker 1] episodes_seen=1190 last_return=-219.2 (+1 eps) [worker 0] episodes_seen=1190 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1190 last_return=-229.9 (+1 eps) [worker 1] episodes_seen=1200 last_return=-138.3 (+1 eps) [worker 0] episodes_seen=1200 last_return=-186.3 (+1 eps) [worker 2] episodes_seen=1200 last_return=-211.5 (+1 eps) [worker 1] episodes_seen=1210 last_return=-221.7 (+1 eps) [worker 0] episodes_seen=1210 last_return=-248.5 (+1 eps) [worker 2] episodes_seen=1210 last_return=-197.7 (+1 eps) [worker 1] episodes_seen=1220 last_return=-196.9 (+1 eps) [worker 0] episodes_seen=1220 last_return=-150.5 (+1 eps) [worker 2] episodes_seen=1220 last_return=-159.4 (+1 eps) [worker 1] episodes_seen=1230 last_return=-234.2 (+1 eps) [worker 0] episodes_seen=1230 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1240 last_return=-118.9 (+1 eps) [worker 2] episodes_seen=1230 last_return=-109.7 (+1 eps) [worker 0] episodes_seen=1240 last_return=-217.4 (+1 eps) [worker 1] episodes_seen=1250 last_return=-108.3 (+1 eps) [worker 2] episodes_seen=1240 last_return=-201.6 (+1 eps) [worker 0] episodes_seen=1250 last_return=-230.5 (+1 eps) [worker 1] episodes_seen=1260 last_return=-129.6 (+1 eps) [worker 2] episodes_seen=1250 last_return=-142.7 (+1 eps) [worker 0] episodes_seen=1260 last_return=-166.3 (+1 eps) [worker 1] episodes_seen=1270 last_return=-109.4 (+1 eps) [worker 2] episodes_seen=1260 last_return=-257.0 (+1 eps) [worker 0] episodes_seen=1270 last_return=-160.8 (+1 eps) [worker 1] episodes_seen=1280 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1270 last_return=-266.1 (+1 eps) [worker 0] episodes_seen=1280 last_return=-258.1 (+1 eps) [worker 1] episodes_seen=1290 last_return=-238.1 (+1 eps) [worker 0] episodes_seen=1290 last_return=-132.5 (+1 eps) [worker 2] episodes_seen=1280 last_return=-353.9 (+1 eps) [A2C][sync] it= 5005 steps= 300300 (+ 60) avg10=-185.38 loss=4380.493 pg=-0.000 vf=7300.821 H=0.000 gn=41944.641 [worker 1] episodes_seen=1300 last_return=-244.1 (+1 eps) [worker 2] episodes_seen=1290 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1310 last_return=-110.9 (+1 eps) [worker 0] episodes_seen=1300 last_return=-226.5 (+1 eps) [worker 2] episodes_seen=1300 last_return=-274.3 (+1 eps) [worker 1] episodes_seen=1320 last_return=-160.7 (+1 eps) [worker 0] episodes_seen=1310 last_return=-185.6 (+1 eps) [worker 2] episodes_seen=1310 last_return=-137.6 (+1 eps) [worker 0] episodes_seen=1320 last_return=-156.0 (+1 eps) [worker 1] episodes_seen=1330 last_return=-134.7 (+1 eps) [worker 0] episodes_seen=1330 last_return=-217.1 (+1 eps) [worker 2] episodes_seen=1320 last_return=-197.5 (+1 eps) [worker 1] episodes_seen=1340 last_return=-157.8 (+1 eps) [worker 0] episodes_seen=1340 last_return=-183.9 (+1 eps) [worker 2] episodes_seen=1330 last_return=-129.1 (+1 eps) [worker 1] episodes_seen=1350 last_return=-136.3 (+1 eps) [worker 0] episodes_seen=1350 last_return=-196.3 (+1 eps) [worker 1] episodes_seen=1360 last_return=-186.9 (+1 eps) [worker 2] episodes_seen=1340 last_return=-257.3 (+1 eps) [worker 1] episodes_seen=1370 last_return=-172.0 (+1 eps) [worker 0] episodes_seen=1360 last_return=-313.7 (+1 eps) [worker 2] episodes_seen=1350 last_return=-249.3 (+1 eps) [worker 1] episodes_seen=1380 last_return=-214.9 (+1 eps) [worker 2] episodes_seen=1360 last_return=-120.0 (+1 eps) [worker 0] episodes_seen=1370 last_return=-239.0 (+1 eps) [worker 1] episodes_seen=1390 last_return=-281.3 (+1 eps) [worker 2] episodes_seen=1370 last_return=-207.0 (+1 eps) [worker 0] episodes_seen=1380 last_return=-123.2 (+1 eps) [worker 1] episodes_seen=1400 last_return=-135.1 (+1 eps) [worker 2] episodes_seen=1380 last_return=-251.2 (+1 eps) [worker 0] episodes_seen=1390 last_return=-129.8 (+1 eps) [worker 1] episodes_seen=1410 last_return=-136.8 (+1 eps) [worker 0] episodes_seen=1400 last_return=-138.5 (+1 eps) [worker 2] episodes_seen=1390 last_return=-289.8 (+1 eps) [worker 1] episodes_seen=1420 last_return=-232.9 (+1 eps) [worker 1] episodes_seen=1430 last_return=-190.9 (+1 eps) [worker 0] episodes_seen=1410 last_return=-199.4 (+1 eps) [worker 2] episodes_seen=1400 last_return=-181.4 (+1 eps) [worker 1] episodes_seen=1440 last_return=-279.6 (+1 eps) [worker 0] episodes_seen=1420 last_return=-169.7 (+1 eps) [worker 2] episodes_seen=1410 last_return=-200.9 (+1 eps) [A2C][sync] it= 5839 steps= 350340 (+ 60) avg10=-177.49 loss=2729.683 pg=0.000 vf=4549.472 H=0.000 gn=31877.492 [worker 1] episodes_seen=1450 last_return=-155.5 (+1 eps) [worker 0] episodes_seen=1430 last_return=-210.0 (+1 eps) [worker 2] episodes_seen=1420 last_return=-191.7 (+1 eps) [worker 1] episodes_seen=1460 last_return=-223.9 (+1 eps) [worker 0] episodes_seen=1440 last_return=-163.6 (+1 eps) [worker 2] episodes_seen=1430 last_return=-294.7 (+1 eps) [worker 1] episodes_seen=1470 last_return=-183.7 (+1 eps) [worker 0] episodes_seen=1450 last_return=-220.4 (+1 eps) [worker 1] episodes_seen=1480 last_return=-231.3 (+1 eps) [worker 2] episodes_seen=1440 last_return=-129.5 (+1 eps) [worker 0] episodes_seen=1460 last_return=-249.3 (+1 eps) [worker 1] episodes_seen=1490 last_return=-159.4 (+1 eps) [worker 2] episodes_seen=1450 last_return=-120.5 (+1 eps) [worker 1] episodes_seen=1500 last_return=-206.1 (+1 eps) [worker 0] episodes_seen=1470 last_return=-162.0 (+1 eps) [worker 2] episodes_seen=1460 last_return=-357.3 (+1 eps) [worker 1] episodes_seen=1510 last_return=-200.8 (+1 eps) [worker 0] episodes_seen=1480 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1470 last_return=-119.1 (+1 eps) [worker 1] episodes_seen=1520 last_return=-199.3 (+1 eps) [worker 0] episodes_seen=1490 last_return=-263.8 (+1 eps) [worker 2] episodes_seen=1480 last_return=-121.9 (+1 eps) [worker 1] episodes_seen=1530 last_return=-157.9 (+1 eps) [worker 0] episodes_seen=1500 last_return=-120.2 (+1 eps) [worker 2] episodes_seen=1490 last_return=-265.4 (+1 eps) [worker 1] episodes_seen=1540 last_return=-149.7 (+1 eps) [worker 0] episodes_seen=1510 last_return=-112.0 (+1 eps) [worker 2] episodes_seen=1500 last_return=-263.5 (+1 eps) [worker 1] episodes_seen=1550 last_return=-223.1 (+1 eps) [worker 0] episodes_seen=1520 last_return=-222.6 (+1 eps) [worker 2] episodes_seen=1510 last_return=-227.0 (+1 eps) [worker 1] episodes_seen=1560 last_return=-294.3 (+1 eps) [worker 0] episodes_seen=1530 last_return=-244.3 (+1 eps) [worker 1] episodes_seen=1570 last_return=-143.8 (+1 eps) [worker 2] episodes_seen=1520 last_return=-266.2 (+1 eps) [worker 0] episodes_seen=1540 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1530 last_return=-167.7 (+1 eps) [worker 0] episodes_seen=1550 last_return=-143.7 (+1 eps) [worker 1] episodes_seen=1580 last_return=-254.5 (+1 eps) [worker 2] episodes_seen=1540 last_return=-139.1 (+1 eps) [worker 0] episodes_seen=1560 last_return=-220.4 (+1 eps) [worker 1] episodes_seen=1590 last_return=-218.2 (+1 eps) [A2C][sync] it= 6673 steps= 400380 (+ 60) avg10=-182.42 loss=6559.751 pg=0.000 vf=10932.919 H=0.000 gn=55143.488 [worker 2] episodes_seen=1550 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1570 last_return=-223.0 (+1 eps) [worker 1] episodes_seen=1600 last_return=-141.1 (+1 eps) [worker 2] episodes_seen=1560 last_return=-204.5 (+1 eps) [worker 0] episodes_seen=1580 last_return=-194.3 (+1 eps) [worker 1] episodes_seen=1610 last_return=-187.5 (+1 eps) [worker 0] episodes_seen=1590 last_return=-110.8 (+1 eps) [worker 2] episodes_seen=1570 last_return=-109.4 (+1 eps) [worker 1] episodes_seen=1620 last_return=-113.7 (+1 eps) [worker 0] episodes_seen=1600 last_return=-197.1 (+1 eps) [worker 2] episodes_seen=1580 last_return=-218.3 (+1 eps) [worker 1] episodes_seen=1630 last_return=-106.9 (+1 eps) [worker 0] episodes_seen=1610 last_return=-242.3 (+1 eps) [worker 2] episodes_seen=1590 last_return=-176.3 (+1 eps) [worker 1] episodes_seen=1640 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1620 last_return=-191.6 (+1 eps) [worker 1] episodes_seen=1650 last_return=-232.8 (+1 eps) [worker 2] episodes_seen=1600 last_return=-184.4 (+1 eps) [worker 0] episodes_seen=1630 last_return=-133.2 (+1 eps) [worker 1] episodes_seen=1660 last_return=-167.7 (+1 eps) [worker 2] episodes_seen=1610 last_return=-196.9 (+1 eps) [worker 0] episodes_seen=1640 last_return=-118.4 (+1 eps) [worker 1] episodes_seen=1670 last_return=-241.5 (+1 eps) [worker 2] episodes_seen=1620 last_return=-200.7 (+1 eps) [worker 0] episodes_seen=1650 last_return=-248.7 (+1 eps) [worker 1] episodes_seen=1680 last_return=-167.8 (+1 eps) [worker 2] episodes_seen=1630 last_return=-121.6 (+1 eps) [worker 0] episodes_seen=1660 last_return=-146.3 (+1 eps) [worker 2] episodes_seen=1640 last_return=-274.3 (+1 eps) [worker 1] episodes_seen=1690 last_return=-113.0 (+1 eps) [worker 1] episodes_seen=1700 last_return=-265.9 (+1 eps) [worker 0] episodes_seen=1670 last_return=-109.7 (+1 eps) [worker 2] episodes_seen=1650 last_return=-123.3 (+1 eps) [worker 0] episodes_seen=1680 last_return=-155.6 (+1 eps) [worker 1] episodes_seen=1710 last_return=-141.3 (+1 eps) [worker 2] episodes_seen=1660 last_return=-246.9 (+1 eps) [worker 0] episodes_seen=1690 last_return=-222.5 (+1 eps) [worker 1] episodes_seen=1720 last_return=-180.9 (+1 eps) [worker 2] episodes_seen=1670 last_return=-139.0 (+1 eps) [worker 1] episodes_seen=1730 last_return=-159.0 (+1 eps) [worker 0] episodes_seen=1700 last_return=-293.9 (+1 eps) [worker 2] episodes_seen=1680 last_return=-148.3 (+1 eps) [worker 1] episodes_seen=1740 last_return=-273.0 (+1 eps) [A2C][sync] it= 7507 steps= 450420 (+ 60) avg10=-188.46 loss=25034.562 pg=-0.166 vf=41724.566 H=1.186 gn=18963.734 [worker 0] episodes_seen=1710 last_return=-251.9 (+1 eps) [worker 2] episodes_seen=1690 last_return=-187.5 (+1 eps) [worker 1] episodes_seen=1750 last_return=-121.4 (+1 eps) [worker 0] episodes_seen=1720 last_return=-227.3 (+1 eps) [worker 2] episodes_seen=1700 last_return=-21.4 (+1 eps) [worker 1] episodes_seen=1760 last_return=-147.7 (+1 eps) [worker 0] episodes_seen=1730 last_return=-149.0 (+1 eps) [worker 2] episodes_seen=1710 last_return=-163.9 (+1 eps) [worker 1] episodes_seen=1770 last_return=-252.9 (+1 eps) [worker 0] episodes_seen=1740 last_return=-130.2 (+1 eps) [worker 2] episodes_seen=1720 last_return=-122.6 (+1 eps) [worker 1] episodes_seen=1780 last_return=-201.2 (+1 eps) [worker 2] episodes_seen=1730 last_return=-109.5 (+1 eps) [worker 0] episodes_seen=1750 last_return=-137.6 (+1 eps) [worker 1] episodes_seen=1790 last_return=-162.8 (+1 eps) [worker 0] episodes_seen=1760 last_return=-80.0 (+1 eps) [worker 2] episodes_seen=1740 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1800 last_return=-242.6 (+1 eps) [worker 0] episodes_seen=1770 last_return=-141.5 (+1 eps) [worker 2] episodes_seen=1750 last_return=-176.8 (+1 eps) [worker 1] episodes_seen=1810 last_return=-179.0 (+1 eps) [worker 2] episodes_seen=1760 last_return=-97.7 (+1 eps) [worker 0] episodes_seen=1780 last_return=-224.2 (+1 eps) [worker 1] episodes_seen=1820 last_return=-111.7 (+1 eps) [worker 2] episodes_seen=1770 last_return=-113.0 (+1 eps) [worker 0] episodes_seen=1790 last_return=-239.5 (+1 eps) [worker 1] episodes_seen=1830 last_return=-109.6 (+1 eps) [worker 2] episodes_seen=1780 last_return=-163.0 (+1 eps) [worker 0] episodes_seen=1800 last_return=-139.3 (+1 eps) [worker 1] episodes_seen=1840 last_return=34.6 (+1 eps) [worker 0] episodes_seen=1810 last_return=-198.8 (+1 eps) [worker 2] episodes_seen=1790 last_return=-140.5 (+1 eps) [worker 1] episodes_seen=1850 last_return=-80.0 (+1 eps) [worker 0] episodes_seen=1820 last_return=-153.1 (+1 eps) [worker 2] episodes_seen=1800 last_return=-151.8 (+1 eps) [worker 1] episodes_seen=1860 last_return=-93.7 (+1 eps) [worker 0] episodes_seen=1830 last_return=-144.6 (+1 eps) [worker 2] episodes_seen=1810 last_return=-108.9 (+1 eps) [worker 0] episodes_seen=1840 last_return=-110.2 (+1 eps) [worker 1] episodes_seen=1870 last_return=-123.6 (+1 eps) [worker 2] episodes_seen=1820 last_return=-111.5 (+1 eps) [worker 0] episodes_seen=1850 last_return=-118.6 (+1 eps) [worker 1] episodes_seen=1880 last_return=-77.5 (+1 eps) [worker 2] episodes_seen=1830 last_return=-231.1 (+1 eps) [worker 0] episodes_seen=1860 last_return=-226.2 (+1 eps) [worker 2] episodes_seen=1840 last_return=-111.9 (+1 eps) [worker 1] episodes_seen=1890 last_return=-75.3 (+1 eps) [worker 0] episodes_seen=1870 last_return=-116.8 (+1 eps) [worker 1] episodes_seen=1900 last_return=-85.3 (+1 eps) [worker 2] episodes_seen=1850 last_return=-129.1 (+1 eps) [worker 1] episodes_seen=1910 last_return=-141.5 (+1 eps) [worker 0] episodes_seen=1880 last_return=-78.7 (+1 eps) [worker 2] episodes_seen=1860 last_return=-123.8 (+1 eps) [worker 1] episodes_seen=1920 last_return=-25.6 (+1 eps) [worker 0] episodes_seen=1890 last_return=-58.2 (+1 eps) [worker 2] episodes_seen=1870 last_return=-76.9 (+1 eps) [A2C][sync] it= 8341 steps= 500460 (+ 60) avg10= -78.56 loss=185.805 pg=-0.033 vf=309.747 H=0.974 gn=1750.215 [worker 1] episodes_seen=1930 last_return=-84.6 (+1 eps) [worker 0] episodes_seen=1900 last_return=-112.3 (+1 eps) [worker 2] episodes_seen=1880 last_return=-44.7 (+1 eps) [worker 1] episodes_seen=1940 last_return=-48.6 (+1 eps) [worker 0] episodes_seen=1910 last_return=-79.1 (+1 eps) [worker 0] episodes_seen=1920 last_return=-95.4 (+1 eps) [worker 1] episodes_seen=1950 last_return=-94.6 (+1 eps) [worker 2] episodes_seen=1890 last_return=-83.9 (+1 eps) [worker 0] episodes_seen=1930 last_return=-95.8 (+1 eps) [worker 2] episodes_seen=1900 last_return=-73.3 (+1 eps) [worker 1] episodes_seen=1960 last_return=-101.1 (+1 eps) [worker 0] episodes_seen=1940 last_return=-88.1 (+1 eps) [worker 2] episodes_seen=1910 last_return=-74.2 (+1 eps) [worker 1] episodes_seen=1970 last_return=-70.0 (+1 eps) [worker 0] episodes_seen=1950 last_return=-74.3 (+1 eps) [worker 2] episodes_seen=1920 last_return=-59.2 (+1 eps) [worker 2] episodes_seen=1930 last_return=-51.9 (+1 eps) [worker 0] episodes_seen=1960 last_return=-82.7 (+1 eps) [worker 1] episodes_seen=1980 last_return=-57.6 (+1 eps) [worker 2] episodes_seen=1940 last_return=-87.6 (+1 eps) [worker 1] episodes_seen=1990 last_return=-68.4 (+1 eps) [worker 0] episodes_seen=1970 last_return=-70.9 (+1 eps) [worker 2] episodes_seen=1950 last_return=-70.5 (+1 eps) [worker 1] episodes_seen=2000 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1980 last_return=-44.5 (+1 eps) [worker 2] episodes_seen=1960 last_return=-66.9 (+1 eps) [worker 1] episodes_seen=2010 last_return=-75.6 (+1 eps) [worker 0] episodes_seen=1990 last_return=-54.2 (+1 eps) [worker 2] episodes_seen=1970 last_return=-70.7 (+1 eps) [worker 1] episodes_seen=2020 last_return=-84.8 (+1 eps) [worker 0] episodes_seen=2000 last_return=-100.0 (+1 eps) [A2C][sync] it= 9175 steps= 550500 (+ 60) avg10= -74.85 loss=33.566 pg=-0.213 vf=56.314 H=0.965 gn=552.190 [worker 2] episodes_seen=1980 last_return=-36.9 (+1 eps) [worker 1] episodes_seen=2030 last_return=-93.8 (+1 eps) [worker 2] episodes_seen=1990 last_return=-67.7 (+1 eps) [worker 0] episodes_seen=2010 last_return=-10.0 (+1 eps) [worker 1] episodes_seen=2040 last_return=-77.4 (+1 eps) [worker 2] episodes_seen=2000 last_return=-77.6 (+1 eps) [worker 0] episodes_seen=2020 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2050 last_return=-56.3 (+1 eps) [worker 2] episodes_seen=2010 last_return=-38.3 (+1 eps) [worker 0] episodes_seen=2030 last_return=-49.2 (+1 eps) [worker 2] episodes_seen=2020 last_return=-90.5 (+1 eps) [worker 0] episodes_seen=2040 last_return=-58.6 (+1 eps) [worker 1] episodes_seen=2060 last_return=8.8 (+1 eps) [worker 2] episodes_seen=2030 last_return=-59.0 (+1 eps) [worker 0] episodes_seen=2050 last_return=-80.2 (+1 eps) [worker 2] episodes_seen=2040 last_return=-81.7 (+1 eps) [worker 0] episodes_seen=2060 last_return=-56.0 (+1 eps) [worker 1] episodes_seen=2070 last_return=-94.1 (+1 eps) [worker 0] episodes_seen=2070 last_return=-55.6 (+1 eps) [worker 1] episodes_seen=2080 last_return=-90.1 (+1 eps) [worker 2] episodes_seen=2050 last_return=-2.6 (+1 eps) [worker 0] episodes_seen=2080 last_return=-51.6 (+1 eps) [worker 0] episodes_seen=2090 last_return=-74.3 (+1 eps) [worker 1] episodes_seen=2090 last_return=-148.4 (+1 eps) [A2C][sync] it=10009 steps= 600540 (+ 60) avg10= -89.27 loss=63.418 pg=-0.040 vf=105.779 H=0.940 gn=483.338 [worker 2] episodes_seen=2060 last_return=-127.7 (+1 eps) [worker 0] episodes_seen=2100 last_return=-100.5 (+1 eps) [worker 1] episodes_seen=2100 last_return=-59.0 (+1 eps) [worker 2] episodes_seen=2070 last_return=-60.3 (+1 eps) [worker 0] episodes_seen=2110 last_return=-21.3 (+1 eps) [worker 1] episodes_seen=2110 last_return=14.7 (+1 eps) [worker 2] episodes_seen=2080 last_return=-64.8 (+1 eps) [worker 0] episodes_seen=2120 last_return=-46.8 (+1 eps) [worker 2] episodes_seen=2090 last_return=-42.9 (+1 eps) [worker 1] episodes_seen=2120 last_return=-45.7 (+1 eps) [worker 0] episodes_seen=2130 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=2100 last_return=-82.4 (+1 eps) [worker 1] episodes_seen=2130 last_return=-97.7 (+1 eps) [worker 0] episodes_seen=2140 last_return=-93.0 (+1 eps) [worker 2] episodes_seen=2110 last_return=24.9 (+1 eps) [worker 1] episodes_seen=2140 last_return=-95.8 (+1 eps) [worker 0] episodes_seen=2150 last_return=-51.3 (+1 eps) [worker 2] episodes_seen=2120 last_return=-65.3 (+1 eps) [worker 1] episodes_seen=2150 last_return=-20.9 (+1 eps) [A2C][sync] it=10843 steps= 650580 (+ 60) avg10= -60.76 loss=17.376 pg=-0.052 vf=29.063 H=0.946 gn=242.530 [worker 1] episodes_seen=2160 last_return=-84.0 (+1 eps) [worker 0] episodes_seen=2160 last_return=-108.5 (+1 eps) [worker 2] episodes_seen=2130 last_return=-44.1 (+1 eps) [worker 1] episodes_seen=2170 last_return=-5.6 (+1 eps) [worker 2] episodes_seen=2140 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=2170 last_return=-91.7 (+1 eps) [worker 1] episodes_seen=2180 last_return=-71.5 (+1 eps) [worker 2] episodes_seen=2150 last_return=-94.3 (+1 eps) [worker 0] episodes_seen=2180 last_return=-69.6 (+1 eps) [worker 1] episodes_seen=2190 last_return=-49.0 (+1 eps) [worker 2] episodes_seen=2160 last_return=-51.6 (+1 eps) [worker 0] episodes_seen=2190 last_return=-69.9 (+1 eps) [worker 2] episodes_seen=2170 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2200 last_return=-71.0 (+1 eps) [A2C][sync] it=11677 steps= 700620 (+ 60) avg10= -38.94 loss=11.172 pg=0.096 vf=18.476 H=0.968 gn=45.964 [worker 0] episodes_seen=2200 last_return=-33.5 (+1 eps) [worker 1] episodes_seen=2210 last_return=-109.5 (+1 eps) [worker 2] episodes_seen=2180 last_return=-23.3 (+1 eps) [worker 1] episodes_seen=2220 last_return=-58.6 (+1 eps) [worker 2] episodes_seen=2190 last_return=-73.9 (+1 eps) [worker 0] episodes_seen=2210 last_return=-79.8 (+1 eps) [worker 2] episodes_seen=2200 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2230 last_return=-89.5 (+1 eps) [worker 2] episodes_seen=2210 last_return=-166.7 (+1 eps) [worker 0] episodes_seen=2220 last_return=-4.6 (+1 eps) [worker 1] episodes_seen=2240 last_return=11.8 (+1 eps) [worker 2] episodes_seen=2220 last_return=-100.7 (+1 eps) [worker 0] episodes_seen=2230 last_return=-93.8 (+1 eps) [worker 2] episodes_seen=2230 last_return=-174.5 (+1 eps) [worker 1] episodes_seen=2250 last_return=-98.2 (+1 eps) [worker 2] episodes_seen=2240 last_return=-64.5 (+1 eps) [worker 0] episodes_seen=2240 last_return=-92.0 (+1 eps) [A2C][sync] it=12511 steps= 750660 (+ 60) avg10= -70.21 loss=21.772 pg=-0.127 vf=36.515 H=0.940 gn=469.176 [worker 1] episodes_seen=2260 last_return=-51.9 (+1 eps) [worker 2] episodes_seen=2250 last_return=-160.1 (+1 eps) [worker 0] episodes_seen=2250 last_return=-62.3 (+1 eps) [worker 1] episodes_seen=2270 last_return=-98.6 (+1 eps) [worker 2] episodes_seen=2260 last_return=-75.2 (+1 eps) [worker 1] episodes_seen=2280 last_return=-106.7 (+1 eps) [worker 0] episodes_seen=2260 last_return=-4.6 (+1 eps) [worker 2] episodes_seen=2270 last_return=-121.7 (+1 eps) [worker 2] episodes_seen=2280 last_return=-100.5 (+1 eps) [worker 1] episodes_seen=2290 last_return=-103.9 (+1 eps) [worker 0] episodes_seen=2270 last_return=-104.6 (+1 eps) [worker 2] episodes_seen=2290 last_return=-90.1 (+1 eps) [worker 1] episodes_seen=2300 last_return=-139.5 (+1 eps) [worker 0] episodes_seen=2280 last_return=-25.3 (+1 eps) [worker 2] episodes_seen=2300 last_return=-68.1 (+1 eps) [A2C][sync] it=13345 steps= 800700 (+ 60) avg10= -52.96 loss=22.482 pg=-0.084 vf=37.624 H=0.901 gn=224.496 [worker 1] episodes_seen=2310 last_return=-73.8 (+1 eps) [worker 2] episodes_seen=2310 last_return=-94.4 (+1 eps) [worker 0] episodes_seen=2290 last_return=-46.3 (+1 eps) [worker 1] episodes_seen=2320 last_return=-135.5 (+1 eps) [worker 0] episodes_seen=2300 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=2320 last_return=-125.4 (+1 eps) [worker 1] episodes_seen=2330 last_return=-1.5 (+1 eps) [worker 2] episodes_seen=2330 last_return=-157.5 (+1 eps) [worker 0] episodes_seen=2310 last_return=-61.6 (+1 eps) [worker 1] episodes_seen=2340 last_return=-189.9 (+1 eps) [worker 2] episodes_seen=2340 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=2320 last_return=23.7 (+1 eps) [worker 1] episodes_seen=2350 last_return=-55.5 (+1 eps) [worker 2] episodes_seen=2350 last_return=-1.6 (+1 eps) [worker 0] episodes_seen=2330 last_return=-66.0 (+1 eps) [worker 1] episodes_seen=2360 last_return=-57.8 (+1 eps) [worker 2] episodes_seen=2360 last_return=-80.6 (+1 eps) [worker 0] episodes_seen=2340 last_return=-67.5 (+1 eps) [A2C][sync] it=14179 steps= 850740 (+ 60) avg10= -57.96 loss=76.730 pg=-0.055 vf=127.986 H=0.648 gn=699.853 [worker 2] episodes_seen=2370 last_return=-80.6 (+1 eps) [worker 1] episodes_seen=2370 last_return=-58.8 (+1 eps) [worker 0] episodes_seen=2350 last_return=-53.1 (+1 eps) [worker 2] episodes_seen=2380 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2380 last_return=-70.8 (+1 eps) [worker 0] episodes_seen=2360 last_return=-192.1 (+1 eps) [worker 1] episodes_seen=2390 last_return=-0.5 (+1 eps) [worker 2] episodes_seen=2390 last_return=-26.0 (+1 eps) [worker 0] episodes_seen=2370 last_return=-50.3 (+1 eps) [worker 1] episodes_seen=2400 last_return=-73.7 (+1 eps) [worker 2] episodes_seen=2400 last_return=6.2 (+1 eps) [worker 1] episodes_seen=2410 last_return=-159.4 (+1 eps) [worker 0] episodes_seen=2380 last_return=-5.7 (+1 eps) [worker 2] episodes_seen=2410 last_return=45.6 (+1 eps) [worker 1] episodes_seen=2420 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=2420 last_return=-71.5 (+1 eps) [worker 0] episodes_seen=2390 last_return=11.0 (+1 eps) [A2C][sync] it=15013 steps= 900780 (+ 60) avg10= -66.81 loss=12.227 pg=-0.067 vf=20.507 H=0.997 gn=171.142 [worker 1] episodes_seen=2430 last_return=-55.8 (+1 eps) [worker 2] episodes_seen=2430 last_return=-94.4 (+1 eps) [worker 0] episodes_seen=2400 last_return=-74.4 (+1 eps) [worker 1] episodes_seen=2440 last_return=-2.4 (+1 eps) [worker 2] episodes_seen=2440 last_return=-66.8 (+1 eps) [worker 0] episodes_seen=2410 last_return=-11.3 (+1 eps) [worker 1] episodes_seen=2450 last_return=-37.1 (+1 eps) [worker 2] episodes_seen=2450 last_return=14.6 (+1 eps) [worker 0] episodes_seen=2420 last_return=-68.0 (+1 eps) [worker 1] episodes_seen=2460 last_return=7.0 (+1 eps) [worker 0] episodes_seen=2430 last_return=-69.7 (+1 eps) [worker 2] episodes_seen=2460 last_return=-56.3 (+1 eps) [worker 0] episodes_seen=2440 last_return=6.4 (+1 eps) [worker 2] episodes_seen=2470 last_return=-63.4 (+1 eps) [A2C][sync] it=15847 steps= 950820 (+ 60) avg10= -73.25 loss=3.919 pg=0.028 vf=6.500 H=0.861 gn=186.545 [worker 1] episodes_seen=2470 last_return=-23.4 (+1 eps) [worker 0] episodes_seen=2450 last_return=-73.6 (+1 eps) [worker 2] episodes_seen=2480 last_return=-94.7 (+1 eps) [worker 0] episodes_seen=2460 last_return=-70.1 (+1 eps) [worker 1] episodes_seen=2480 last_return=-1.1 (+1 eps) [worker 2] episodes_seen=2490 last_return=-16.5 (+1 eps) [worker 0] episodes_seen=2470 last_return=-218.8 (+1 eps) [worker 1] episodes_seen=2490 last_return=-73.2 (+1 eps) [worker 2] episodes_seen=2500 last_return=-70.7 (+1 eps) [worker 0] episodes_seen=2480 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2500 last_return=-68.4 (+1 eps) [worker 2] episodes_seen=2510 last_return=-95.8 (+1 eps) [worker 0] episodes_seen=2490 last_return=-98.9 (+1 eps) [worker 2] episodes_seen=2520 last_return=9.0 (+1 eps) [worker 1] episodes_seen=2510 last_return=-101.4 (+1 eps) [worker 0] episodes_seen=2500 last_return=-120.3 (+1 eps) [A2C][sync] it=16667 steps= 1000020 (+ 60) avg10= -86.72 loss=35.780 pg=0.004 vf=59.640 H=0.810 gn=426.432 [Checkpoint] Saved self-describing checkpoint → part2_artifacts/checkpoints/a2c_run15_seed1227.pth [A2C][sync] done: steps=1000020 time=965.3s avg10=-86.72
[Run run15_seed1227] checkpoint: part2_artifacts/checkpoints/a2c_run15_seed1227.pth [Run run15_seed1227] training plot (tail 500): part2_artifacts/train_curve_run15_seed1227.png [Run run15_seed1227] training plot (full): part2_artifacts/train_curve_full_run15_seed1227.png [Run run15_seed1227] per-worker plot: part2_artifacts/train_curve_workers_run15_seed1227.png [Run run15_seed1227] workers-average plot: part2_artifacts/train_curve_workers_avg_run15_seed1227.png
[Eval run15_seed1227] mean=-71.37 std=216.66 min=-534.27 max=67.99 [Eval run15_seed1227] CSV: part2_artifacts/eval10_run15_seed1227.csv [Eval run15_seed1227] plot: part2_artifacts/eval10_run15_seed1227.png [Best] ep=1 return=67.99 seed=1228
/usr/local/lib/python3.12/dist-packages/gymnasium/wrappers/rendering.py:293: UserWarning: WARN: Overwriting existing videos at /content/part2_artifacts/videos/run15_seed1227 folder (try specifying a different `video_folder` for the `RecordVideo` wrapper if this is not desired)
logger.warn(
[Video run15_seed1227] episode return=67.99 [Video run15_seed1227] saved under: part2_artifacts/videos run15_seed1227 | mean=-71.4±216.7 | best_ep=1, best_ret=68.0
Run#16
run_id = f"run16_seed{SEED}"
model, logs, paths = train_once(
run_id=run_id,
n_workers=4,
total_env_steps=1_200_000,
T=20,
gamma=0.995,
entropy_coef=0.015,
value_coef=0.55,
max_grad_norm=0.5,
lr=3e-4,
log_every=60_000,
)
metrics, _ = evaluate_10(run_id, paths.ckpt_path)
video_dir, best_idx, best_ret = record_best_from_eval(run_id, paths.ckpt_path)
print(f"{run_id} | mean={metrics['mean']:.1f}±{metrics['std']:.1f} | best_ep={best_idx}, best_ret={best_ret:.1f}")
[Run run16_seed1227] starting training… [A2C][sync] start: workers=4, T=20, target_steps=1200000, mp=fork [A2C][sync] it= 1 steps= 80 (+ 80) avg10= nan loss=54.251 pg=-0.000 vf=98.677 H=1.386 gn=19.808 [worker 3] episodes_seen=10 last_return=-101.4 (+1 eps) [worker 1] episodes_seen=10 last_return=-121.5 (+1 eps) [worker 0] episodes_seen=10 last_return=-117.7 (+1 eps) [worker 2] episodes_seen=10 last_return=-75.3 (+1 eps) [worker 0] episodes_seen=20 last_return=-290.3 (+1 eps) [worker 1] episodes_seen=20 last_return=-115.1 (+1 eps) [worker 3] episodes_seen=20 last_return=-211.1 (+1 eps) [worker 2] episodes_seen=20 last_return=-215.0 (+1 eps) [worker 0] episodes_seen=30 last_return=-146.9 (+1 eps) [worker 1] episodes_seen=30 last_return=-168.2 (+1 eps) [worker 3] episodes_seen=30 last_return=-219.8 (+1 eps) [worker 2] episodes_seen=30 last_return=-119.0 (+1 eps) [worker 0] episodes_seen=40 last_return=-186.3 (+1 eps) [worker 1] episodes_seen=40 last_return=-150.3 (+1 eps) [worker 2] episodes_seen=40 last_return=-119.9 (+1 eps) [worker 3] episodes_seen=40 last_return=-121.8 (+1 eps) [worker 0] episodes_seen=50 last_return=-152.4 (+1 eps) [worker 2] episodes_seen=50 last_return=-225.7 (+1 eps) [worker 1] episodes_seen=50 last_return=-109.2 (+1 eps) [worker 3] episodes_seen=50 last_return=-235.7 (+1 eps) [worker 0] episodes_seen=60 last_return=-128.1 (+1 eps) [worker 1] episodes_seen=60 last_return=-171.5 (+1 eps) [worker 2] episodes_seen=60 last_return=-175.6 (+1 eps) [worker 3] episodes_seen=60 last_return=-250.7 (+1 eps) [worker 0] episodes_seen=70 last_return=-213.5 (+1 eps) [worker 1] episodes_seen=70 last_return=-226.6 (+1 eps) [worker 2] episodes_seen=70 last_return=-140.1 (+1 eps) [worker 3] episodes_seen=70 last_return=-141.1 (+1 eps) [worker 0] episodes_seen=80 last_return=-186.7 (+1 eps) [worker 1] episodes_seen=80 last_return=-253.2 (+1 eps) [worker 2] episodes_seen=80 last_return=-118.4 (+1 eps) [worker 3] episodes_seen=80 last_return=-325.0 (+1 eps) [worker 0] episodes_seen=90 last_return=-136.8 (+1 eps) [worker 1] episodes_seen=90 last_return=-170.2 (+1 eps) [worker 2] episodes_seen=90 last_return=-211.4 (+1 eps) [worker 3] episodes_seen=90 last_return=-157.9 (+1 eps) [worker 1] episodes_seen=100 last_return=-188.4 (+1 eps) [worker 0] episodes_seen=100 last_return=-148.2 (+1 eps) [worker 3] episodes_seen=100 last_return=-217.6 (+1 eps) [worker 2] episodes_seen=100 last_return=-142.6 (+1 eps) [worker 1] episodes_seen=110 last_return=-132.8 (+1 eps) [worker 3] episodes_seen=110 last_return=-257.9 (+1 eps) [worker 0] episodes_seen=110 last_return=-252.0 (+1 eps) [worker 2] episodes_seen=110 last_return=-252.5 (+1 eps) [worker 1] episodes_seen=120 last_return=-119.5 (+1 eps) [worker 0] episodes_seen=120 last_return=-199.5 (+1 eps) [worker 3] episodes_seen=120 last_return=-201.1 (+1 eps) [A2C][sync] it= 751 steps= 60080 (+ 80) avg10=-197.56 loss=12600.537 pg=0.000 vf=22910.066 H=0.001 gn=12918.605 [worker 2] episodes_seen=120 last_return=-256.5 (+1 eps) [worker 1] episodes_seen=130 last_return=-125.9 (+1 eps) [worker 3] episodes_seen=130 last_return=-142.1 (+1 eps) [worker 0] episodes_seen=130 last_return=-180.2 (+1 eps) [worker 2] episodes_seen=130 last_return=-122.0 (+1 eps) [worker 3] episodes_seen=140 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=140 last_return=-163.4 (+1 eps) [worker 0] episodes_seen=140 last_return=-174.2 (+1 eps) [worker 3] episodes_seen=150 last_return=-233.8 (+1 eps) [worker 2] episodes_seen=140 last_return=-117.1 (+1 eps) [worker 0] episodes_seen=150 last_return=-156.3 (+1 eps) [worker 1] episodes_seen=150 last_return=-144.1 (+1 eps) [worker 2] episodes_seen=150 last_return=-198.9 (+1 eps) [worker 3] episodes_seen=160 last_return=-125.9 (+1 eps) [worker 0] episodes_seen=160 last_return=-234.2 (+1 eps) [worker 1] episodes_seen=160 last_return=-248.9 (+1 eps) [worker 2] episodes_seen=160 last_return=-281.6 (+1 eps) [worker 0] episodes_seen=170 last_return=-108.2 (+1 eps) [worker 3] episodes_seen=170 last_return=-181.7 (+1 eps) [worker 1] episodes_seen=170 last_return=-222.8 (+1 eps) [worker 2] episodes_seen=170 last_return=-183.9 (+1 eps) [worker 1] episodes_seen=180 last_return=-247.1 (+1 eps) [worker 3] episodes_seen=180 last_return=-290.0 (+1 eps) [worker 0] episodes_seen=180 last_return=-113.7 (+1 eps) [worker 2] episodes_seen=180 last_return=-168.0 (+1 eps) [worker 1] episodes_seen=190 last_return=-147.6 (+1 eps) [worker 3] episodes_seen=190 last_return=-173.1 (+1 eps) [worker 0] episodes_seen=190 last_return=-141.6 (+1 eps) [worker 2] episodes_seen=190 last_return=-233.1 (+1 eps) [worker 0] episodes_seen=200 last_return=-168.2 (+1 eps) [worker 1] episodes_seen=200 last_return=-115.4 (+1 eps) [worker 3] episodes_seen=200 last_return=-110.7 (+1 eps) [worker 2] episodes_seen=200 last_return=-121.4 (+1 eps) [worker 0] episodes_seen=210 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=210 last_return=-120.5 (+1 eps) [worker 3] episodes_seen=210 last_return=-148.0 (+1 eps) [worker 0] episodes_seen=220 last_return=-160.1 (+1 eps) [worker 2] episodes_seen=210 last_return=-174.4 (+1 eps) [worker 3] episodes_seen=220 last_return=-131.6 (+1 eps) [worker 1] episodes_seen=220 last_return=-224.9 (+1 eps) [worker 2] episodes_seen=220 last_return=-118.2 (+1 eps) [worker 0] episodes_seen=230 last_return=-120.6 (+1 eps) [worker 3] episodes_seen=230 last_return=-126.5 (+1 eps) [worker 1] episodes_seen=230 last_return=-199.1 (+1 eps) [worker 2] episodes_seen=230 last_return=-278.5 (+1 eps) [worker 0] episodes_seen=240 last_return=-131.8 (+1 eps) [worker 3] episodes_seen=240 last_return=-193.8 (+1 eps) [worker 1] episodes_seen=240 last_return=-210.6 (+1 eps) [worker 0] episodes_seen=250 last_return=-206.2 (+1 eps) [worker 2] episodes_seen=240 last_return=-132.2 (+1 eps) [worker 3] episodes_seen=250 last_return=-256.6 (+1 eps) [A2C][sync] it= 1501 steps= 120080 (+ 80) avg10=-216.69 loss=7908.259 pg=-0.000 vf=14378.653 H=0.000 gn=51620.969 [worker 1] episodes_seen=250 last_return=-183.6 (+1 eps) [worker 2] episodes_seen=250 last_return=-112.4 (+1 eps) [worker 3] episodes_seen=260 last_return=-201.9 (+1 eps) [worker 0] episodes_seen=260 last_return=-111.7 (+1 eps) [worker 1] episodes_seen=260 last_return=-114.2 (+1 eps) [worker 2] episodes_seen=260 last_return=-211.4 (+1 eps) [worker 0] episodes_seen=270 last_return=-123.5 (+1 eps) [worker 3] episodes_seen=270 last_return=-137.1 (+1 eps) [worker 1] episodes_seen=270 last_return=-129.7 (+1 eps) [worker 2] episodes_seen=270 last_return=-106.8 (+1 eps) [worker 3] episodes_seen=280 last_return=-177.0 (+1 eps) [worker 0] episodes_seen=280 last_return=-134.3 (+1 eps) [worker 1] episodes_seen=280 last_return=-169.7 (+1 eps) [worker 2] episodes_seen=280 last_return=-189.6 (+1 eps) [worker 3] episodes_seen=290 last_return=-235.8 (+1 eps) [worker 0] episodes_seen=290 last_return=-238.9 (+1 eps) [worker 1] episodes_seen=290 last_return=-170.5 (+1 eps) [worker 3] episodes_seen=300 last_return=-133.6 (+1 eps) [worker 2] episodes_seen=290 last_return=-234.4 (+1 eps) [worker 0] episodes_seen=300 last_return=-177.5 (+1 eps) [worker 3] episodes_seen=310 last_return=-166.5 (+1 eps) [worker 1] episodes_seen=300 last_return=-204.5 (+1 eps) [worker 2] episodes_seen=300 last_return=-195.9 (+1 eps) [worker 0] episodes_seen=310 last_return=-212.8 (+1 eps) [worker 3] episodes_seen=320 last_return=-134.5 (+1 eps) [worker 1] episodes_seen=310 last_return=-245.6 (+1 eps) [worker 2] episodes_seen=310 last_return=-117.2 (+1 eps) [worker 0] episodes_seen=320 last_return=-105.2 (+1 eps) [worker 3] episodes_seen=330 last_return=-124.7 (+1 eps) [worker 1] episodes_seen=320 last_return=-209.0 (+1 eps) [worker 2] episodes_seen=320 last_return=-107.8 (+1 eps) [worker 0] episodes_seen=330 last_return=-232.2 (+1 eps) [worker 1] episodes_seen=330 last_return=-154.4 (+1 eps) [worker 3] episodes_seen=340 last_return=-221.7 (+1 eps) [worker 2] episodes_seen=330 last_return=-263.7 (+1 eps) [worker 1] episodes_seen=340 last_return=-122.8 (+1 eps) [worker 0] episodes_seen=340 last_return=-231.1 (+1 eps) [worker 3] episodes_seen=350 last_return=-168.1 (+1 eps) [worker 2] episodes_seen=340 last_return=-188.5 (+1 eps) [worker 0] episodes_seen=350 last_return=-159.6 (+1 eps) [worker 1] episodes_seen=350 last_return=-244.3 (+1 eps) [worker 3] episodes_seen=360 last_return=-164.9 (+1 eps) [worker 2] episodes_seen=350 last_return=-237.0 (+1 eps) [worker 1] episodes_seen=360 last_return=-216.0 (+1 eps) [worker 0] episodes_seen=360 last_return=-176.8 (+1 eps) [worker 3] episodes_seen=370 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=360 last_return=-110.1 (+1 eps) [worker 1] episodes_seen=370 last_return=-230.0 (+1 eps) [worker 3] episodes_seen=380 last_return=-224.7 (+1 eps) [A2C][sync] it= 2251 steps= 180080 (+ 80) avg10=-225.57 loss=1874048.875 pg=-0.000 vf=3407361.500 H=0.000 gn=1236043.250 [worker 2] episodes_seen=370 last_return=-111.5 (+1 eps) [worker 0] episodes_seen=370 last_return=-395.2 (+1 eps) [worker 1] episodes_seen=380 last_return=-163.1 (+1 eps) [worker 3] episodes_seen=390 last_return=-292.3 (+1 eps) [worker 2] episodes_seen=380 last_return=-257.0 (+1 eps) [worker 0] episodes_seen=380 last_return=-181.4 (+1 eps) [worker 1] episodes_seen=390 last_return=-217.5 (+1 eps) [worker 3] episodes_seen=400 last_return=-168.3 (+1 eps) [worker 2] episodes_seen=390 last_return=-281.3 (+1 eps) [worker 0] episodes_seen=390 last_return=-212.1 (+1 eps) [worker 1] episodes_seen=400 last_return=-150.6 (+1 eps) [worker 3] episodes_seen=410 last_return=-224.5 (+1 eps) [worker 2] episodes_seen=400 last_return=-117.3 (+1 eps) [worker 0] episodes_seen=400 last_return=-145.2 (+1 eps) [worker 1] episodes_seen=410 last_return=-140.8 (+1 eps) [worker 2] episodes_seen=410 last_return=-115.8 (+1 eps) [worker 3] episodes_seen=420 last_return=-115.5 (+1 eps) [worker 0] episodes_seen=410 last_return=-246.8 (+1 eps) [worker 1] episodes_seen=420 last_return=-182.2 (+1 eps) [worker 2] episodes_seen=420 last_return=-138.7 (+1 eps) [worker 3] episodes_seen=430 last_return=-115.2 (+1 eps) [worker 0] episodes_seen=420 last_return=-129.7 (+1 eps) [worker 1] episodes_seen=430 last_return=-124.1 (+1 eps) [worker 3] episodes_seen=440 last_return=-152.8 (+1 eps) [worker 2] episodes_seen=430 last_return=-218.5 (+1 eps) [worker 0] episodes_seen=430 last_return=-232.6 (+1 eps) [worker 1] episodes_seen=440 last_return=-243.6 (+1 eps) [worker 2] episodes_seen=440 last_return=-256.5 (+1 eps) [worker 3] episodes_seen=450 last_return=-181.2 (+1 eps) [worker 0] episodes_seen=440 last_return=-172.6 (+1 eps) [worker 1] episodes_seen=450 last_return=-249.5 (+1 eps) [worker 3] episodes_seen=460 last_return=-214.5 (+1 eps) [worker 2] episodes_seen=450 last_return=-120.5 (+1 eps) [worker 0] episodes_seen=450 last_return=-130.4 (+1 eps) [worker 1] episodes_seen=460 last_return=-165.4 (+1 eps) [worker 3] episodes_seen=470 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=460 last_return=-113.4 (+1 eps) [worker 0] episodes_seen=460 last_return=-148.2 (+1 eps) [worker 3] episodes_seen=480 last_return=-256.4 (+1 eps) [worker 1] episodes_seen=470 last_return=-245.9 (+1 eps) [worker 2] episodes_seen=470 last_return=-235.1 (+1 eps) [worker 0] episodes_seen=470 last_return=-266.9 (+1 eps) [worker 3] episodes_seen=490 last_return=-176.5 (+1 eps) [worker 1] episodes_seen=480 last_return=-119.4 (+1 eps) [worker 2] episodes_seen=480 last_return=-128.1 (+1 eps) [worker 0] episodes_seen=480 last_return=-273.0 (+1 eps) [worker 3] episodes_seen=500 last_return=-167.1 (+1 eps) [worker 1] episodes_seen=490 last_return=-134.5 (+1 eps) [worker 2] episodes_seen=490 last_return=-179.8 (+1 eps) [A2C][sync] it= 3001 steps= 240080 (+ 80) avg10=-189.92 loss=49677.555 pg=-0.000 vf=90322.828 H=0.000 gn=375587.219 [worker 0] episodes_seen=490 last_return=-215.9 (+1 eps) [worker 1] episodes_seen=500 last_return=-113.3 (+1 eps) [worker 3] episodes_seen=510 last_return=-115.1 (+1 eps) [worker 2] episodes_seen=500 last_return=-222.2 (+1 eps) [worker 0] episodes_seen=500 last_return=-149.2 (+1 eps) [worker 1] episodes_seen=510 last_return=-219.1 (+1 eps) [worker 3] episodes_seen=520 last_return=-157.6 (+1 eps) [worker 2] episodes_seen=510 last_return=-110.5 (+1 eps) [worker 0] episodes_seen=510 last_return=-160.5 (+1 eps) [worker 1] episodes_seen=520 last_return=-236.0 (+1 eps) [worker 3] episodes_seen=530 last_return=-158.8 (+1 eps) [worker 2] episodes_seen=520 last_return=-113.5 (+1 eps) [worker 0] episodes_seen=520 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=540 last_return=-105.9 (+1 eps) [worker 2] episodes_seen=530 last_return=-353.3 (+1 eps) [worker 0] episodes_seen=530 last_return=-149.3 (+1 eps) [worker 1] episodes_seen=530 last_return=-305.4 (+1 eps) [worker 2] episodes_seen=540 last_return=-180.7 (+1 eps) [worker 3] episodes_seen=550 last_return=-276.3 (+1 eps) [worker 0] episodes_seen=540 last_return=-145.6 (+1 eps) [worker 1] episodes_seen=540 last_return=-279.3 (+1 eps) [worker 0] episodes_seen=550 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=550 last_return=-158.8 (+1 eps) [worker 3] episodes_seen=560 last_return=-184.1 (+1 eps) [worker 1] episodes_seen=550 last_return=-219.1 (+1 eps) [worker 3] episodes_seen=570 last_return=-173.6 (+1 eps) [worker 2] episodes_seen=560 last_return=-133.5 (+1 eps) [worker 0] episodes_seen=560 last_return=-201.1 (+1 eps) [worker 1] episodes_seen=560 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=580 last_return=-192.5 (+1 eps) [worker 2] episodes_seen=570 last_return=-264.9 (+1 eps) [worker 0] episodes_seen=570 last_return=-106.1 (+1 eps) [worker 1] episodes_seen=570 last_return=-158.9 (+1 eps) [worker 3] episodes_seen=590 last_return=-120.7 (+1 eps) [worker 0] episodes_seen=580 last_return=-209.3 (+1 eps) [worker 2] episodes_seen=580 last_return=-175.4 (+1 eps) [worker 1] episodes_seen=580 last_return=-231.2 (+1 eps) [worker 0] episodes_seen=590 last_return=-257.2 (+1 eps) [worker 3] episodes_seen=600 last_return=-212.5 (+1 eps) [worker 1] episodes_seen=590 last_return=-111.2 (+1 eps) [worker 2] episodes_seen=590 last_return=-243.0 (+1 eps) [worker 0] episodes_seen=600 last_return=-213.1 (+1 eps) [worker 3] episodes_seen=610 last_return=-204.6 (+1 eps) [worker 1] episodes_seen=600 last_return=-219.7 (+1 eps) [worker 2] episodes_seen=600 last_return=-206.5 (+1 eps) [worker 0] episodes_seen=610 last_return=-215.8 (+1 eps) [worker 3] episodes_seen=620 last_return=-120.2 (+1 eps) [worker 1] episodes_seen=610 last_return=-146.0 (+1 eps) [worker 2] episodes_seen=610 last_return=-217.2 (+1 eps) [A2C][sync] it= 3751 steps= 300080 (+ 80) avg10=-202.99 loss=1736638.625 pg=-0.000 vf=3157524.750 H=0.000 gn=388046.938 [worker 0] episodes_seen=620 last_return=-170.2 (+1 eps) [worker 3] episodes_seen=630 last_return=-151.8 (+1 eps) [worker 1] episodes_seen=620 last_return=-299.5 (+1 eps) [worker 2] episodes_seen=620 last_return=-159.1 (+1 eps) [worker 0] episodes_seen=630 last_return=-151.4 (+1 eps) [worker 3] episodes_seen=640 last_return=-281.7 (+1 eps) [worker 1] episodes_seen=630 last_return=-206.5 (+1 eps) [worker 2] episodes_seen=630 last_return=-177.4 (+1 eps) [worker 3] episodes_seen=650 last_return=-133.6 (+1 eps) [worker 0] episodes_seen=640 last_return=-203.7 (+1 eps) [worker 1] episodes_seen=640 last_return=-216.9 (+1 eps) [worker 2] episodes_seen=640 last_return=-222.9 (+1 eps) [worker 3] episodes_seen=660 last_return=-227.6 (+1 eps) [worker 0] episodes_seen=650 last_return=-155.1 (+1 eps) [worker 1] episodes_seen=650 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=650 last_return=-132.3 (+1 eps) [worker 3] episodes_seen=670 last_return=-260.1 (+1 eps) [worker 0] episodes_seen=660 last_return=-118.0 (+1 eps) [worker 1] episodes_seen=660 last_return=-264.7 (+1 eps) [worker 2] episodes_seen=660 last_return=-242.4 (+1 eps) [worker 0] episodes_seen=670 last_return=-177.1 (+1 eps) [worker 3] episodes_seen=680 last_return=-179.1 (+1 eps) [worker 1] episodes_seen=670 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=670 last_return=-171.9 (+1 eps) [worker 0] episodes_seen=680 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=690 last_return=-201.4 (+1 eps) [worker 1] episodes_seen=680 last_return=-240.1 (+1 eps) [worker 2] episodes_seen=680 last_return=-158.4 (+1 eps) [worker 0] episodes_seen=690 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=700 last_return=-125.4 (+1 eps) [worker 1] episodes_seen=690 last_return=-223.0 (+1 eps) [worker 0] episodes_seen=700 last_return=-109.8 (+1 eps) [worker 2] episodes_seen=690 last_return=-155.6 (+1 eps) [worker 3] episodes_seen=710 last_return=-107.8 (+1 eps) [worker 1] episodes_seen=700 last_return=-153.9 (+1 eps) [worker 0] episodes_seen=710 last_return=-160.5 (+1 eps) [worker 3] episodes_seen=720 last_return=-168.5 (+1 eps) [worker 2] episodes_seen=700 last_return=-143.2 (+1 eps) [worker 1] episodes_seen=710 last_return=-180.9 (+1 eps) [worker 0] episodes_seen=720 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=730 last_return=-128.7 (+1 eps) [worker 2] episodes_seen=710 last_return=-201.0 (+1 eps) [worker 1] episodes_seen=720 last_return=-106.8 (+1 eps) [worker 0] episodes_seen=730 last_return=-130.6 (+1 eps) [worker 2] episodes_seen=720 last_return=-273.3 (+1 eps) [worker 3] episodes_seen=740 last_return=-193.3 (+1 eps) [worker 1] episodes_seen=730 last_return=-244.3 (+1 eps) [worker 3] episodes_seen=750 last_return=-123.0 (+1 eps) [worker 2] episodes_seen=730 last_return=-240.1 (+1 eps) [worker 0] episodes_seen=740 last_return=-109.8 (+1 eps) [worker 1] episodes_seen=740 last_return=-190.7 (+1 eps) [A2C][sync] it= 4501 steps= 360080 (+ 80) avg10=-187.21 loss=85926.547 pg=-0.000 vf=156230.078 H=0.000 gn=728712.188 [worker 3] episodes_seen=760 last_return=-215.7 (+1 eps) [worker 2] episodes_seen=740 last_return=-270.7 (+1 eps) [worker 0] episodes_seen=750 last_return=-181.0 (+1 eps) [worker 1] episodes_seen=750 last_return=-105.5 (+1 eps) [worker 2] episodes_seen=750 last_return=-184.1 (+1 eps) [worker 0] episodes_seen=760 last_return=-268.1 (+1 eps) [worker 1] episodes_seen=760 last_return=-113.0 (+1 eps) [worker 3] episodes_seen=770 last_return=-252.8 (+1 eps) [worker 2] episodes_seen=760 last_return=-143.0 (+1 eps) [worker 0] episodes_seen=770 last_return=-117.6 (+1 eps) [worker 3] episodes_seen=780 last_return=-250.3 (+1 eps) [worker 1] episodes_seen=770 last_return=-273.4 (+1 eps) [worker 2] episodes_seen=770 last_return=-249.1 (+1 eps) [worker 0] episodes_seen=780 last_return=-175.4 (+1 eps) [worker 3] episodes_seen=790 last_return=-219.6 (+1 eps) [worker 1] episodes_seen=780 last_return=-153.1 (+1 eps) [worker 2] episodes_seen=780 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=790 last_return=-272.0 (+1 eps) [worker 3] episodes_seen=800 last_return=-131.0 (+1 eps) [worker 1] episodes_seen=790 last_return=-145.4 (+1 eps) [worker 0] episodes_seen=800 last_return=-118.2 (+1 eps) [worker 2] episodes_seen=790 last_return=-114.5 (+1 eps) [worker 3] episodes_seen=810 last_return=-238.6 (+1 eps) [worker 1] episodes_seen=800 last_return=-205.8 (+1 eps) [worker 0] episodes_seen=810 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=800 last_return=-193.9 (+1 eps) [worker 3] episodes_seen=820 last_return=-201.2 (+1 eps) [worker 1] episodes_seen=810 last_return=-238.1 (+1 eps) [worker 0] episodes_seen=820 last_return=-250.2 (+1 eps) [worker 2] episodes_seen=810 last_return=-113.8 (+1 eps) [worker 3] episodes_seen=830 last_return=-113.0 (+1 eps) [worker 1] episodes_seen=820 last_return=-251.0 (+1 eps) [worker 0] episodes_seen=830 last_return=-214.4 (+1 eps) [worker 3] episodes_seen=840 last_return=-314.7 (+1 eps) [worker 2] episodes_seen=820 last_return=-312.7 (+1 eps) [worker 1] episodes_seen=830 last_return=-181.8 (+1 eps) [worker 0] episodes_seen=840 last_return=-247.3 (+1 eps) [worker 3] episodes_seen=850 last_return=-178.9 (+1 eps) [worker 2] episodes_seen=830 last_return=-200.8 (+1 eps) [worker 1] episodes_seen=840 last_return=-166.5 (+1 eps) [worker 0] episodes_seen=850 last_return=-151.3 (+1 eps) [worker 3] episodes_seen=860 last_return=-257.3 (+1 eps) [worker 2] episodes_seen=840 last_return=-245.2 (+1 eps) [worker 1] episodes_seen=850 last_return=-139.3 (+1 eps) [worker 3] episodes_seen=870 last_return=-204.6 (+1 eps) [worker 0] episodes_seen=860 last_return=-241.5 (+1 eps) [worker 1] episodes_seen=860 last_return=-265.1 (+1 eps) [A2C][sync] it= 5251 steps= 420080 (+ 80) avg10=-180.02 loss=18007790.000 pg=-0.000 vf=32741434.000 H=0.000 gn=7712796.500 [worker 2] episodes_seen=850 last_return=-209.7 (+1 eps) [worker 3] episodes_seen=880 last_return=-234.4 (+1 eps) [worker 0] episodes_seen=870 last_return=-196.9 (+1 eps) [worker 1] episodes_seen=870 last_return=-143.1 (+1 eps) [worker 2] episodes_seen=860 last_return=-238.8 (+1 eps) [worker 0] episodes_seen=880 last_return=-218.4 (+1 eps) [worker 3] episodes_seen=890 last_return=-261.5 (+1 eps) [worker 2] episodes_seen=870 last_return=-156.0 (+1 eps) [worker 1] episodes_seen=880 last_return=-166.3 (+1 eps) [worker 0] episodes_seen=890 last_return=-158.7 (+1 eps) [worker 3] episodes_seen=900 last_return=-121.1 (+1 eps) [worker 2] episodes_seen=880 last_return=-192.5 (+1 eps) [worker 1] episodes_seen=890 last_return=-107.0 (+1 eps) [worker 0] episodes_seen=900 last_return=-228.4 (+1 eps) [worker 3] episodes_seen=910 last_return=-305.0 (+1 eps) [worker 1] episodes_seen=900 last_return=-226.7 (+1 eps) [worker 2] episodes_seen=890 last_return=-215.7 (+1 eps) [worker 0] episodes_seen=910 last_return=-134.0 (+1 eps) [worker 3] episodes_seen=920 last_return=-170.4 (+1 eps) [worker 1] episodes_seen=910 last_return=-143.5 (+1 eps) [worker 2] episodes_seen=900 last_return=-183.3 (+1 eps) [worker 0] episodes_seen=920 last_return=-137.4 (+1 eps) [worker 1] episodes_seen=920 last_return=-171.7 (+1 eps) [worker 3] episodes_seen=930 last_return=-185.0 (+1 eps) [worker 2] episodes_seen=910 last_return=-183.3 (+1 eps) [worker 0] episodes_seen=930 last_return=-240.6 (+1 eps) [worker 1] episodes_seen=930 last_return=-118.3 (+1 eps) [worker 2] episodes_seen=920 last_return=-171.1 (+1 eps) [worker 0] episodes_seen=940 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=940 last_return=-318.2 (+1 eps) [worker 3] episodes_seen=940 last_return=-220.1 (+1 eps) [worker 2] episodes_seen=930 last_return=-162.9 (+1 eps) [worker 1] episodes_seen=950 last_return=-242.4 (+1 eps) [worker 0] episodes_seen=950 last_return=-177.7 (+1 eps) [worker 3] episodes_seen=950 last_return=-106.5 (+1 eps) [worker 2] episodes_seen=940 last_return=-110.1 (+1 eps) [worker 0] episodes_seen=960 last_return=-240.8 (+1 eps) [worker 1] episodes_seen=960 last_return=-172.0 (+1 eps) [worker 3] episodes_seen=960 last_return=-212.1 (+1 eps) [worker 1] episodes_seen=970 last_return=-161.1 (+1 eps) [worker 2] episodes_seen=950 last_return=-157.2 (+1 eps) [worker 0] episodes_seen=970 last_return=-252.8 (+1 eps) [worker 3] episodes_seen=970 last_return=-180.0 (+1 eps) [worker 1] episodes_seen=980 last_return=-190.9 (+1 eps) [worker 2] episodes_seen=960 last_return=-118.8 (+1 eps) [worker 3] episodes_seen=980 last_return=-220.3 (+1 eps) [worker 0] episodes_seen=980 last_return=-353.4 (+1 eps) [worker 1] episodes_seen=990 last_return=-209.0 (+1 eps) [worker 2] episodes_seen=970 last_return=-141.1 (+1 eps) [worker 3] episodes_seen=990 last_return=-242.8 (+1 eps) [A2C][sync] it= 6001 steps= 480080 (+ 80) avg10=-210.11 loss=179980.891 pg=-0.000 vf=327237.969 H=0.000 gn=1033447.688 [worker 0] episodes_seen=990 last_return=-179.2 (+1 eps) [worker 1] episodes_seen=1000 last_return=-111.1 (+1 eps) [worker 2] episodes_seen=980 last_return=-185.5 (+1 eps) [worker 3] episodes_seen=1000 last_return=-186.3 (+1 eps) [worker 0] episodes_seen=1000 last_return=-121.5 (+1 eps) [worker 2] episodes_seen=990 last_return=-242.3 (+1 eps) [worker 1] episodes_seen=1010 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=1010 last_return=-230.6 (+1 eps) [worker 0] episodes_seen=1010 last_return=-195.5 (+1 eps) [worker 2] episodes_seen=1000 last_return=-161.9 (+1 eps) [worker 1] episodes_seen=1020 last_return=-107.8 (+1 eps) [worker 3] episodes_seen=1020 last_return=-426.9 (+1 eps) [worker 0] episodes_seen=1020 last_return=-111.1 (+1 eps) [worker 2] episodes_seen=1010 last_return=-235.3 (+1 eps) [worker 1] episodes_seen=1030 last_return=-172.9 (+1 eps) [worker 2] episodes_seen=1020 last_return=-229.6 (+1 eps) [worker 0] episodes_seen=1030 last_return=-153.2 (+1 eps) [worker 3] episodes_seen=1030 last_return=-310.6 (+1 eps) [worker 1] episodes_seen=1040 last_return=-168.5 (+1 eps) [worker 0] episodes_seen=1040 last_return=-271.1 (+1 eps) [worker 2] episodes_seen=1030 last_return=-246.9 (+1 eps) [worker 3] episodes_seen=1040 last_return=-172.9 (+1 eps) [worker 1] episodes_seen=1050 last_return=-181.9 (+1 eps) [worker 0] episodes_seen=1050 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1040 last_return=-121.3 (+1 eps) [worker 3] episodes_seen=1050 last_return=-175.1 (+1 eps) [worker 1] episodes_seen=1060 last_return=-172.7 (+1 eps) [worker 0] episodes_seen=1060 last_return=-228.6 (+1 eps) [worker 2] episodes_seen=1050 last_return=-152.6 (+1 eps) [worker 3] episodes_seen=1060 last_return=-151.6 (+1 eps) [worker 1] episodes_seen=1070 last_return=-231.5 (+1 eps) [worker 0] episodes_seen=1070 last_return=-171.3 (+1 eps) [worker 2] episodes_seen=1060 last_return=-264.1 (+1 eps) [worker 1] episodes_seen=1080 last_return=-255.2 (+1 eps) [worker 3] episodes_seen=1070 last_return=-240.1 (+1 eps) [worker 0] episodes_seen=1080 last_return=-154.7 (+1 eps) [worker 2] episodes_seen=1070 last_return=-253.4 (+1 eps) [worker 3] episodes_seen=1080 last_return=-172.1 (+1 eps) [worker 1] episodes_seen=1090 last_return=-253.6 (+1 eps) [worker 0] episodes_seen=1090 last_return=-254.1 (+1 eps) [worker 3] episodes_seen=1090 last_return=-305.8 (+1 eps) [worker 2] episodes_seen=1080 last_return=-263.5 (+1 eps) [worker 1] episodes_seen=1100 last_return=-164.3 (+1 eps) [worker 0] episodes_seen=1100 last_return=-277.1 (+1 eps) [worker 2] episodes_seen=1090 last_return=-260.0 (+1 eps) [worker 3] episodes_seen=1100 last_return=-343.6 (+1 eps) [worker 1] episodes_seen=1110 last_return=-105.8 (+1 eps) [worker 0] episodes_seen=1110 last_return=-162.8 (+1 eps) [A2C][sync] it= 6751 steps= 540080 (+ 80) avg10=-188.84 loss=331225.438 pg=-0.000 vf=602228.062 H=0.000 gn=2523522.500 [worker 2] episodes_seen=1100 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=1110 last_return=-198.0 (+1 eps) [worker 0] episodes_seen=1120 last_return=-189.2 (+1 eps) [worker 1] episodes_seen=1120 last_return=-207.4 (+1 eps) [worker 2] episodes_seen=1110 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=1120 last_return=-180.3 (+1 eps) [worker 0] episodes_seen=1130 last_return=-183.3 (+1 eps) [worker 1] episodes_seen=1130 last_return=-113.7 (+1 eps) [worker 2] episodes_seen=1120 last_return=-288.7 (+1 eps) [worker 3] episodes_seen=1130 last_return=-131.6 (+1 eps) [worker 0] episodes_seen=1140 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1140 last_return=-242.3 (+1 eps) [worker 2] episodes_seen=1130 last_return=-254.5 (+1 eps) [worker 0] episodes_seen=1150 last_return=-138.6 (+1 eps) [worker 3] episodes_seen=1140 last_return=-170.5 (+1 eps) [worker 1] episodes_seen=1150 last_return=-167.7 (+1 eps) [worker 2] episodes_seen=1140 last_return=-214.0 (+1 eps) [worker 0] episodes_seen=1160 last_return=-182.6 (+1 eps) [worker 3] episodes_seen=1150 last_return=-137.7 (+1 eps) [worker 1] episodes_seen=1160 last_return=-267.3 (+1 eps) [worker 3] episodes_seen=1160 last_return=-108.1 (+1 eps) [worker 2] episodes_seen=1150 last_return=-183.4 (+1 eps) [worker 0] episodes_seen=1170 last_return=-208.0 (+1 eps) [worker 1] episodes_seen=1170 last_return=-256.8 (+1 eps) [worker 3] episodes_seen=1170 last_return=-145.5 (+1 eps) [worker 0] episodes_seen=1180 last_return=-251.3 (+1 eps) [worker 2] episodes_seen=1160 last_return=-234.0 (+1 eps) [worker 1] episodes_seen=1180 last_return=-146.7 (+1 eps) [worker 3] episodes_seen=1180 last_return=-132.1 (+1 eps) [worker 2] episodes_seen=1170 last_return=-245.2 (+1 eps) [worker 1] episodes_seen=1190 last_return=-156.6 (+1 eps) [worker 0] episodes_seen=1190 last_return=-202.0 (+1 eps) [worker 3] episodes_seen=1190 last_return=-193.9 (+1 eps) [worker 0] episodes_seen=1200 last_return=-220.8 (+1 eps) [worker 2] episodes_seen=1180 last_return=-163.1 (+1 eps) [worker 1] episodes_seen=1200 last_return=-180.6 (+1 eps) [worker 3] episodes_seen=1200 last_return=-163.8 (+1 eps) [worker 0] episodes_seen=1210 last_return=-118.1 (+1 eps) [worker 1] episodes_seen=1210 last_return=-200.8 (+1 eps) [worker 2] episodes_seen=1190 last_return=-131.9 (+1 eps) [worker 0] episodes_seen=1220 last_return=-207.4 (+1 eps) [worker 1] episodes_seen=1220 last_return=-283.0 (+1 eps) [worker 3] episodes_seen=1210 last_return=-192.7 (+1 eps) [worker 2] episodes_seen=1200 last_return=-124.1 (+1 eps) [worker 3] episodes_seen=1220 last_return=-192.4 (+1 eps) [worker 2] episodes_seen=1210 last_return=-180.0 (+1 eps) [worker 0] episodes_seen=1230 last_return=-137.8 (+1 eps) [worker 1] episodes_seen=1230 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1220 last_return=-202.7 (+1 eps) [worker 3] episodes_seen=1230 last_return=-109.6 (+1 eps) [worker 0] episodes_seen=1240 last_return=-134.2 (+1 eps) [A2C][sync] it= 7501 steps= 600080 (+ 80) avg10=-179.94 loss=345920.031 pg=-0.000 vf=628945.500 H=0.000 gn=2153112.250 [worker 1] episodes_seen=1240 last_return=-174.3 (+1 eps) [worker 2] episodes_seen=1230 last_return=-178.1 (+1 eps) [worker 0] episodes_seen=1250 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=1240 last_return=-138.1 (+1 eps) [worker 1] episodes_seen=1250 last_return=-182.5 (+1 eps) [worker 2] episodes_seen=1240 last_return=-116.1 (+1 eps) [worker 0] episodes_seen=1260 last_return=-291.0 (+1 eps) [worker 1] episodes_seen=1260 last_return=-186.8 (+1 eps) [worker 3] episodes_seen=1250 last_return=-246.7 (+1 eps) [worker 0] episodes_seen=1270 last_return=-246.6 (+1 eps) [worker 2] episodes_seen=1250 last_return=-134.2 (+1 eps) [worker 1] episodes_seen=1270 last_return=-123.5 (+1 eps) [worker 3] episodes_seen=1260 last_return=-166.3 (+1 eps) [worker 2] episodes_seen=1260 last_return=-212.9 (+1 eps) [worker 0] episodes_seen=1280 last_return=-243.4 (+1 eps) [worker 1] episodes_seen=1280 last_return=-105.6 (+1 eps) [worker 3] episodes_seen=1270 last_return=-189.9 (+1 eps) [worker 2] episodes_seen=1270 last_return=-184.7 (+1 eps) [worker 0] episodes_seen=1290 last_return=-185.0 (+1 eps) [worker 3] episodes_seen=1280 last_return=-129.0 (+1 eps) [worker 1] episodes_seen=1290 last_return=-114.9 (+1 eps) [worker 2] episodes_seen=1280 last_return=-149.1 (+1 eps) [worker 0] episodes_seen=1300 last_return=-202.0 (+1 eps) [worker 3] episodes_seen=1290 last_return=-301.0 (+1 eps) [worker 2] episodes_seen=1290 last_return=-275.6 (+1 eps) [worker 1] episodes_seen=1300 last_return=-173.9 (+1 eps) [worker 0] episodes_seen=1310 last_return=-159.0 (+1 eps) [worker 2] episodes_seen=1300 last_return=-111.2 (+1 eps) [worker 3] episodes_seen=1300 last_return=-217.4 (+1 eps) [worker 1] episodes_seen=1310 last_return=-154.1 (+1 eps) [worker 0] episodes_seen=1320 last_return=-150.3 (+1 eps) [worker 2] episodes_seen=1310 last_return=-253.3 (+1 eps) [worker 3] episodes_seen=1310 last_return=-165.2 (+1 eps) [worker 1] episodes_seen=1320 last_return=-251.3 (+1 eps) [worker 0] episodes_seen=1330 last_return=-125.2 (+1 eps) [worker 2] episodes_seen=1320 last_return=-223.6 (+1 eps) [worker 3] episodes_seen=1320 last_return=-256.1 (+1 eps) [worker 0] episodes_seen=1340 last_return=-108.6 (+1 eps) [worker 1] episodes_seen=1330 last_return=-225.5 (+1 eps) [worker 2] episodes_seen=1330 last_return=-123.7 (+1 eps) [worker 3] episodes_seen=1330 last_return=-300.0 (+1 eps) [worker 0] episodes_seen=1350 last_return=-262.3 (+1 eps) [worker 1] episodes_seen=1340 last_return=-253.8 (+1 eps) [worker 2] episodes_seen=1340 last_return=-163.2 (+1 eps) [worker 3] episodes_seen=1340 last_return=-179.0 (+1 eps) [worker 0] episodes_seen=1360 last_return=-305.1 (+1 eps) [worker 1] episodes_seen=1350 last_return=-226.0 (+1 eps) [A2C][sync] it= 8251 steps= 660080 (+ 80) avg10=-183.24 loss=42114880.000 pg=-0.000 vf=76572504.000 H=0.000 gn=17029988.000 [worker 3] episodes_seen=1350 last_return=-130.8 (+1 eps) [worker 2] episodes_seen=1350 last_return=-158.6 (+1 eps) [worker 0] episodes_seen=1370 last_return=-317.2 (+1 eps) [worker 1] episodes_seen=1360 last_return=-228.3 (+1 eps) [worker 3] episodes_seen=1360 last_return=-220.4 (+1 eps) [worker 0] episodes_seen=1380 last_return=-162.1 (+1 eps) [worker 2] episodes_seen=1360 last_return=-210.9 (+1 eps) [worker 1] episodes_seen=1370 last_return=-210.5 (+1 eps) [worker 3] episodes_seen=1370 last_return=-112.4 (+1 eps) [worker 2] episodes_seen=1370 last_return=-170.0 (+1 eps) [worker 0] episodes_seen=1390 last_return=-226.0 (+1 eps) [worker 1] episodes_seen=1380 last_return=-250.5 (+1 eps) [worker 0] episodes_seen=1400 last_return=-156.3 (+1 eps) [worker 3] episodes_seen=1380 last_return=-132.3 (+1 eps) [worker 2] episodes_seen=1380 last_return=-130.6 (+1 eps) [worker 1] episodes_seen=1390 last_return=-127.7 (+1 eps) [worker 0] episodes_seen=1410 last_return=-234.4 (+1 eps) [worker 3] episodes_seen=1390 last_return=-196.5 (+1 eps) [worker 2] episodes_seen=1390 last_return=-135.8 (+1 eps) [worker 1] episodes_seen=1400 last_return=-254.9 (+1 eps) [worker 0] episodes_seen=1420 last_return=-180.1 (+1 eps) [worker 3] episodes_seen=1400 last_return=-301.5 (+1 eps) [worker 2] episodes_seen=1400 last_return=-138.0 (+1 eps) [worker 1] episodes_seen=1410 last_return=-179.6 (+1 eps) [worker 0] episodes_seen=1430 last_return=-252.9 (+1 eps) [worker 1] episodes_seen=1420 last_return=-215.6 (+1 eps) [worker 2] episodes_seen=1410 last_return=-295.5 (+1 eps) [worker 3] episodes_seen=1410 last_return=-194.0 (+1 eps) [worker 0] episodes_seen=1440 last_return=-181.1 (+1 eps) [worker 1] episodes_seen=1430 last_return=-105.3 (+1 eps) [worker 2] episodes_seen=1420 last_return=-292.2 (+1 eps) [worker 0] episodes_seen=1450 last_return=-246.1 (+1 eps) [worker 3] episodes_seen=1420 last_return=-200.9 (+1 eps) [worker 1] episodes_seen=1440 last_return=-117.9 (+1 eps) [worker 0] episodes_seen=1460 last_return=-215.9 (+1 eps) [worker 2] episodes_seen=1430 last_return=-149.9 (+1 eps) [worker 3] episodes_seen=1430 last_return=-288.0 (+1 eps) [worker 0] episodes_seen=1470 last_return=-173.5 (+1 eps) [worker 1] episodes_seen=1450 last_return=-197.9 (+1 eps) [worker 2] episodes_seen=1440 last_return=-251.8 (+1 eps) [worker 3] episodes_seen=1440 last_return=-110.2 (+1 eps) [worker 0] episodes_seen=1480 last_return=-116.8 (+1 eps) [worker 1] episodes_seen=1460 last_return=-111.0 (+1 eps) [worker 2] episodes_seen=1450 last_return=-132.3 (+1 eps) [worker 3] episodes_seen=1450 last_return=-127.8 (+1 eps) [worker 0] episodes_seen=1490 last_return=-147.0 (+1 eps) [worker 2] episodes_seen=1460 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1470 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=1460 last_return=-171.3 (+1 eps) [A2C][sync] it= 9001 steps= 720080 (+ 80) avg10=-176.39 loss=1464656.875 pg=-0.000 vf=2663012.500 H=0.000 gn=2858478.250 [worker 0] episodes_seen=1500 last_return=-127.4 (+1 eps) [worker 1] episodes_seen=1480 last_return=-131.6 (+1 eps) [worker 2] episodes_seen=1470 last_return=-157.9 (+1 eps) [worker 3] episodes_seen=1470 last_return=-212.6 (+1 eps) [worker 1] episodes_seen=1490 last_return=-266.4 (+1 eps) [worker 0] episodes_seen=1510 last_return=-138.5 (+1 eps) [worker 2] episodes_seen=1480 last_return=-152.8 (+1 eps) [worker 3] episodes_seen=1480 last_return=-355.1 (+1 eps) [worker 1] episodes_seen=1500 last_return=-139.9 (+1 eps) [worker 0] episodes_seen=1520 last_return=-168.3 (+1 eps) [worker 2] episodes_seen=1490 last_return=-219.5 (+1 eps) [worker 0] episodes_seen=1530 last_return=-215.5 (+1 eps) [worker 3] episodes_seen=1490 last_return=-264.4 (+1 eps) [worker 1] episodes_seen=1510 last_return=-163.2 (+1 eps) [worker 2] episodes_seen=1500 last_return=-171.6 (+1 eps) [worker 0] episodes_seen=1540 last_return=-242.3 (+1 eps) [worker 3] episodes_seen=1500 last_return=-220.2 (+1 eps) [worker 1] episodes_seen=1520 last_return=-192.4 (+1 eps) [worker 2] episodes_seen=1510 last_return=-237.0 (+1 eps) [worker 0] episodes_seen=1550 last_return=-113.4 (+1 eps) [worker 3] episodes_seen=1510 last_return=-236.7 (+1 eps) [worker 2] episodes_seen=1520 last_return=-261.7 (+1 eps) [worker 1] episodes_seen=1530 last_return=-132.5 (+1 eps) [worker 0] episodes_seen=1560 last_return=-123.6 (+1 eps) [worker 2] episodes_seen=1530 last_return=-284.2 (+1 eps) [worker 3] episodes_seen=1520 last_return=-142.4 (+1 eps) [worker 0] episodes_seen=1570 last_return=-105.2 (+1 eps) [worker 1] episodes_seen=1540 last_return=-262.3 (+1 eps) [worker 2] episodes_seen=1540 last_return=-136.5 (+1 eps) [worker 1] episodes_seen=1550 last_return=-188.7 (+1 eps) [worker 3] episodes_seen=1530 last_return=-271.2 (+1 eps) [worker 0] episodes_seen=1580 last_return=-270.9 (+1 eps) [worker 2] episodes_seen=1550 last_return=-142.9 (+1 eps) [worker 1] episodes_seen=1560 last_return=-162.2 (+1 eps) [worker 0] episodes_seen=1590 last_return=-115.5 (+1 eps) [worker 3] episodes_seen=1540 last_return=-224.8 (+1 eps) [worker 2] episodes_seen=1560 last_return=-249.0 (+1 eps) [worker 1] episodes_seen=1570 last_return=-114.4 (+1 eps) [worker 0] episodes_seen=1600 last_return=-118.3 (+1 eps) [worker 3] episodes_seen=1550 last_return=-105.7 (+1 eps) [worker 2] episodes_seen=1570 last_return=-170.2 (+1 eps) [worker 1] episodes_seen=1580 last_return=-189.2 (+1 eps) [worker 0] episodes_seen=1610 last_return=-251.2 (+1 eps) [worker 3] episodes_seen=1560 last_return=-122.2 (+1 eps) [worker 1] episodes_seen=1590 last_return=-210.9 (+1 eps) [worker 2] episodes_seen=1580 last_return=-173.7 (+1 eps) [worker 0] episodes_seen=1620 last_return=-127.8 (+1 eps) [worker 3] episodes_seen=1570 last_return=-245.1 (+1 eps) [A2C][sync] it= 9751 steps= 780080 (+ 80) avg10=-187.47 loss=153055856.000 pg=-0.000 vf=278283360.000 H=0.000 gn=100395928.000 [worker 2] episodes_seen=1590 last_return=-150.9 (+1 eps) [worker 1] episodes_seen=1600 last_return=-182.7 (+1 eps) [worker 0] episodes_seen=1630 last_return=-228.8 (+1 eps) [worker 3] episodes_seen=1580 last_return=-248.2 (+1 eps) [worker 2] episodes_seen=1600 last_return=-256.0 (+1 eps) [worker 0] episodes_seen=1640 last_return=-121.0 (+1 eps) [worker 1] episodes_seen=1610 last_return=-126.6 (+1 eps) [worker 2] episodes_seen=1610 last_return=-136.6 (+1 eps) [worker 3] episodes_seen=1590 last_return=-126.3 (+1 eps) [worker 1] episodes_seen=1620 last_return=-162.0 (+1 eps) [worker 0] episodes_seen=1650 last_return=-151.1 (+1 eps) [worker 3] episodes_seen=1600 last_return=-259.0 (+1 eps) [worker 2] episodes_seen=1620 last_return=-136.1 (+1 eps) [worker 1] episodes_seen=1630 last_return=-246.6 (+1 eps) [worker 0] episodes_seen=1660 last_return=-230.4 (+1 eps) [worker 3] episodes_seen=1610 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1630 last_return=-122.0 (+1 eps) [worker 1] episodes_seen=1640 last_return=-111.4 (+1 eps) [worker 0] episodes_seen=1670 last_return=-98.9 (+1 eps) [worker 3] episodes_seen=1620 last_return=-184.4 (+1 eps) [worker 2] episodes_seen=1640 last_return=-243.1 (+1 eps) [worker 1] episodes_seen=1650 last_return=-266.2 (+1 eps) [worker 0] episodes_seen=1680 last_return=-221.1 (+1 eps) [worker 3] episodes_seen=1630 last_return=-186.4 (+1 eps) [worker 2] episodes_seen=1650 last_return=-226.5 (+1 eps) [worker 1] episodes_seen=1660 last_return=-178.8 (+1 eps) [worker 0] episodes_seen=1690 last_return=-199.1 (+1 eps) [worker 3] episodes_seen=1640 last_return=-127.0 (+1 eps) [worker 2] episodes_seen=1660 last_return=-148.1 (+1 eps) [worker 1] episodes_seen=1670 last_return=-219.5 (+1 eps) [worker 0] episodes_seen=1700 last_return=-230.5 (+1 eps) [worker 3] episodes_seen=1650 last_return=-118.3 (+1 eps) [worker 1] episodes_seen=1680 last_return=-255.1 (+1 eps) [worker 2] episodes_seen=1670 last_return=-201.6 (+1 eps) [worker 0] episodes_seen=1710 last_return=-175.3 (+1 eps) [worker 3] episodes_seen=1660 last_return=-209.3 (+1 eps) [worker 1] episodes_seen=1690 last_return=-205.9 (+1 eps) [worker 2] episodes_seen=1680 last_return=-205.1 (+1 eps) [worker 3] episodes_seen=1670 last_return=-139.7 (+1 eps) [worker 0] episodes_seen=1720 last_return=-265.1 (+1 eps) [worker 1] episodes_seen=1700 last_return=-118.8 (+1 eps) [worker 2] episodes_seen=1690 last_return=-119.6 (+1 eps) [worker 0] episodes_seen=1730 last_return=-209.5 (+1 eps) [worker 3] episodes_seen=1680 last_return=-99.1 (+1 eps) [worker 1] episodes_seen=1710 last_return=-193.3 (+1 eps) [worker 3] episodes_seen=1690 last_return=-195.2 (+1 eps) [worker 0] episodes_seen=1740 last_return=-194.3 (+1 eps) [worker 2] episodes_seen=1700 last_return=-120.9 (+1 eps) [A2C][sync] it=10501 steps= 840080 (+ 80) avg10=-180.35 loss=615400896.000 pg=-0.247 vf=1118910720.000 H=0.320 gn=180397376.000 [worker 0] episodes_seen=1750 last_return=-129.4 (+1 eps) [worker 1] episodes_seen=1720 last_return=-202.2 (+1 eps) [worker 3] episodes_seen=1700 last_return=-263.4 (+1 eps) [worker 2] episodes_seen=1710 last_return=-234.5 (+1 eps) [worker 1] episodes_seen=1730 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1760 last_return=-114.9 (+1 eps) [worker 2] episodes_seen=1720 last_return=-134.0 (+1 eps) [worker 3] episodes_seen=1710 last_return=-270.6 (+1 eps) [worker 1] episodes_seen=1740 last_return=-193.2 (+1 eps) [worker 2] episodes_seen=1730 last_return=-257.6 (+1 eps) [worker 3] episodes_seen=1720 last_return=-119.5 (+1 eps) [worker 0] episodes_seen=1770 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1740 last_return=-233.3 (+1 eps) [worker 1] episodes_seen=1750 last_return=-257.9 (+1 eps) [worker 3] episodes_seen=1730 last_return=-106.4 (+1 eps) [worker 0] episodes_seen=1780 last_return=-129.8 (+1 eps) [worker 2] episodes_seen=1750 last_return=-170.1 (+1 eps) [worker 3] episodes_seen=1740 last_return=-197.8 (+1 eps) [worker 1] episodes_seen=1760 last_return=-188.6 (+1 eps) [worker 0] episodes_seen=1790 last_return=-171.1 (+1 eps) [worker 3] episodes_seen=1750 last_return=-240.3 (+1 eps) [worker 0] episodes_seen=1800 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1760 last_return=-187.2 (+1 eps) [worker 1] episodes_seen=1770 last_return=-218.7 (+1 eps) [worker 3] episodes_seen=1760 last_return=-150.4 (+1 eps) [worker 0] episodes_seen=1810 last_return=-151.9 (+1 eps) [worker 2] episodes_seen=1770 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1780 last_return=-204.0 (+1 eps) [worker 3] episodes_seen=1770 last_return=-109.2 (+1 eps) [worker 0] episodes_seen=1820 last_return=-143.4 (+1 eps) [worker 1] episodes_seen=1790 last_return=-258.2 (+1 eps) [worker 3] episodes_seen=1780 last_return=-214.5 (+1 eps) [worker 2] episodes_seen=1780 last_return=-116.9 (+1 eps) [worker 1] episodes_seen=1800 last_return=-226.8 (+1 eps) [worker 2] episodes_seen=1790 last_return=-205.4 (+1 eps) [worker 3] episodes_seen=1790 last_return=-249.7 (+1 eps) [worker 0] episodes_seen=1830 last_return=-193.6 (+1 eps) [worker 2] episodes_seen=1800 last_return=-178.4 (+1 eps) [worker 3] episodes_seen=1800 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1810 last_return=-249.4 (+1 eps) [worker 0] episodes_seen=1840 last_return=-163.9 (+1 eps) [worker 3] episodes_seen=1810 last_return=-219.7 (+1 eps) [worker 2] episodes_seen=1810 last_return=-166.1 (+1 eps) [worker 1] episodes_seen=1820 last_return=-205.2 (+1 eps) [worker 0] episodes_seen=1850 last_return=-253.6 (+1 eps) [worker 3] episodes_seen=1820 last_return=-139.4 (+1 eps) [worker 2] episodes_seen=1820 last_return=-176.3 (+1 eps) [A2C][sync] it=11251 steps= 900080 (+ 80) avg10=-189.38 loss=199844768.000 pg=-0.000 vf=363354112.000 H=0.000 gn=49753680.000 [worker 0] episodes_seen=1860 last_return=-210.0 (+1 eps) [worker 1] episodes_seen=1830 last_return=-121.3 (+1 eps) [worker 2] episodes_seen=1830 last_return=-178.6 (+1 eps) [worker 3] episodes_seen=1830 last_return=-126.0 (+1 eps) [worker 0] episodes_seen=1870 last_return=-218.7 (+1 eps) [worker 1] episodes_seen=1840 last_return=-244.9 (+1 eps) [worker 3] episodes_seen=1840 last_return=-204.7 (+1 eps) [worker 2] episodes_seen=1840 last_return=-217.2 (+1 eps) [worker 0] episodes_seen=1880 last_return=-235.5 (+1 eps) [worker 1] episodes_seen=1850 last_return=-187.8 (+1 eps) [worker 2] episodes_seen=1850 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=1850 last_return=-163.8 (+1 eps) [worker 1] episodes_seen=1860 last_return=-179.2 (+1 eps) [worker 0] episodes_seen=1890 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1860 last_return=-184.4 (+1 eps) [worker 3] episodes_seen=1860 last_return=-150.0 (+1 eps) [worker 1] episodes_seen=1870 last_return=-222.3 (+1 eps) [worker 0] episodes_seen=1900 last_return=-273.8 (+1 eps) [worker 2] episodes_seen=1870 last_return=-289.1 (+1 eps) [worker 3] episodes_seen=1870 last_return=-142.1 (+1 eps) [worker 0] episodes_seen=1910 last_return=-217.6 (+1 eps) [worker 1] episodes_seen=1880 last_return=-180.1 (+1 eps) [worker 2] episodes_seen=1880 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1920 last_return=-223.5 (+1 eps) [worker 1] episodes_seen=1890 last_return=-241.4 (+1 eps) [worker 3] episodes_seen=1880 last_return=-123.2 (+1 eps) [worker 2] episodes_seen=1890 last_return=-219.2 (+1 eps) [worker 3] episodes_seen=1890 last_return=-135.4 (+1 eps) [worker 1] episodes_seen=1900 last_return=-187.5 (+1 eps) [worker 0] episodes_seen=1930 last_return=-328.7 (+1 eps) [worker 2] episodes_seen=1900 last_return=-114.9 (+1 eps) [worker 3] episodes_seen=1900 last_return=-193.5 (+1 eps) [worker 2] episodes_seen=1910 last_return=-238.0 (+1 eps) [worker 0] episodes_seen=1940 last_return=-118.7 (+1 eps) [worker 1] episodes_seen=1910 last_return=-108.7 (+1 eps) [worker 3] episodes_seen=1910 last_return=-199.2 (+1 eps) [worker 2] episodes_seen=1920 last_return=-295.5 (+1 eps) [worker 0] episodes_seen=1950 last_return=-242.5 (+1 eps) [worker 1] episodes_seen=1920 last_return=-163.8 (+1 eps) [worker 2] episodes_seen=1930 last_return=-117.3 (+1 eps) [worker 3] episodes_seen=1920 last_return=-122.6 (+1 eps) [worker 0] episodes_seen=1960 last_return=-91.5 (+1 eps) [worker 1] episodes_seen=1930 last_return=-161.8 (+1 eps) [worker 2] episodes_seen=1940 last_return=-218.0 (+1 eps) [worker 3] episodes_seen=1930 last_return=-166.8 (+1 eps) [worker 0] episodes_seen=1970 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=1940 last_return=-232.4 (+1 eps) [worker 2] episodes_seen=1950 last_return=-137.5 (+1 eps) [A2C][sync] it=12001 steps= 960080 (+ 80) avg10=-187.78 loss=592917376.000 pg=-0.000 vf=1078031616.000 H=0.000 gn=199407360.000 [worker 1] episodes_seen=1940 last_return=-125.7 (+1 eps) [worker 0] episodes_seen=1980 last_return=-260.3 (+1 eps) [worker 3] episodes_seen=1950 last_return=-173.4 (+1 eps) [worker 1] episodes_seen=1950 last_return=-212.9 (+1 eps) [worker 2] episodes_seen=1960 last_return=-187.2 (+1 eps) [worker 0] episodes_seen=1990 last_return=-152.4 (+1 eps) [worker 3] episodes_seen=1960 last_return=-111.5 (+1 eps) [worker 2] episodes_seen=1970 last_return=-159.5 (+1 eps) [worker 1] episodes_seen=1960 last_return=-332.0 (+1 eps) [worker 0] episodes_seen=2000 last_return=-105.8 (+1 eps) [worker 3] episodes_seen=1970 last_return=-165.6 (+1 eps) [worker 2] episodes_seen=1980 last_return=-120.7 (+1 eps) [worker 1] episodes_seen=1970 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=1980 last_return=-112.6 (+1 eps) [worker 2] episodes_seen=1990 last_return=-118.3 (+1 eps) [worker 0] episodes_seen=2010 last_return=-266.8 (+1 eps) [worker 1] episodes_seen=1980 last_return=-180.2 (+1 eps) [worker 2] episodes_seen=2000 last_return=-214.5 (+1 eps) [worker 0] episodes_seen=2020 last_return=-141.8 (+1 eps) [worker 1] episodes_seen=1990 last_return=-132.7 (+1 eps) [worker 3] episodes_seen=1990 last_return=-254.9 (+1 eps) [worker 2] episodes_seen=2010 last_return=-239.8 (+1 eps) [worker 1] episodes_seen=2000 last_return=-250.5 (+1 eps) [worker 3] episodes_seen=2000 last_return=-186.8 (+1 eps) [worker 0] episodes_seen=2030 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=2020 last_return=-182.4 (+1 eps) [worker 1] episodes_seen=2010 last_return=-260.2 (+1 eps) [worker 0] episodes_seen=2040 last_return=-119.6 (+1 eps) [worker 3] episodes_seen=2010 last_return=-109.4 (+1 eps) [worker 2] episodes_seen=2030 last_return=-248.4 (+1 eps) [worker 3] episodes_seen=2020 last_return=-109.2 (+1 eps) [worker 0] episodes_seen=2050 last_return=-127.9 (+1 eps) [worker 1] episodes_seen=2020 last_return=-140.6 (+1 eps) [worker 2] episodes_seen=2040 last_return=-131.6 (+1 eps) [worker 0] episodes_seen=2060 last_return=-118.1 (+1 eps) [worker 3] episodes_seen=2030 last_return=-226.1 (+1 eps) [worker 1] episodes_seen=2030 last_return=-260.2 (+1 eps) [worker 2] episodes_seen=2050 last_return=-138.3 (+1 eps) [worker 0] episodes_seen=2070 last_return=-112.0 (+1 eps) [worker 3] episodes_seen=2040 last_return=-119.7 (+1 eps) [worker 1] episodes_seen=2040 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=2060 last_return=-377.8 (+1 eps) [worker 3] episodes_seen=2050 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=2080 last_return=-309.8 (+1 eps) [worker 1] episodes_seen=2050 last_return=-257.0 (+1 eps) [worker 2] episodes_seen=2070 last_return=-198.5 (+1 eps) [A2C][sync] it=12751 steps= 1020080 (+ 80) avg10=-134.78 loss=224604.484 pg=-0.097 vf=408371.938 H=0.174 gn=1107622.500 [worker 0] episodes_seen=2090 last_return=-124.1 (+1 eps) [worker 3] episodes_seen=2060 last_return=-124.2 (+1 eps) [worker 2] episodes_seen=2080 last_return=-202.4 (+1 eps) [worker 1] episodes_seen=2060 last_return=-112.8 (+1 eps) [worker 0] episodes_seen=2100 last_return=-271.6 (+1 eps) [worker 3] episodes_seen=2070 last_return=-106.0 (+1 eps) [worker 2] episodes_seen=2090 last_return=-208.5 (+1 eps) [worker 1] episodes_seen=2070 last_return=-149.0 (+1 eps) [worker 3] episodes_seen=2080 last_return=-173.3 (+1 eps) [worker 0] episodes_seen=2110 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=2100 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2080 last_return=-109.4 (+1 eps) [worker 0] episodes_seen=2120 last_return=-154.6 (+1 eps) [worker 3] episodes_seen=2090 last_return=-231.1 (+1 eps) [worker 2] episodes_seen=2110 last_return=-111.3 (+1 eps) [worker 1] episodes_seen=2090 last_return=-105.7 (+1 eps) [worker 3] episodes_seen=2100 last_return=-203.1 (+1 eps) [worker 0] episodes_seen=2130 last_return=-213.4 (+1 eps) [worker 1] episodes_seen=2100 last_return=-131.1 (+1 eps) [worker 2] episodes_seen=2120 last_return=-161.7 (+1 eps) [worker 0] episodes_seen=2140 last_return=-242.0 (+1 eps) [worker 3] episodes_seen=2110 last_return=-274.7 (+1 eps) [worker 1] episodes_seen=2110 last_return=-147.0 (+1 eps) [worker 2] episodes_seen=2130 last_return=-160.6 (+1 eps) [worker 0] episodes_seen=2150 last_return=-283.5 (+1 eps) [worker 3] episodes_seen=2120 last_return=-130.4 (+1 eps) [worker 2] episodes_seen=2140 last_return=-112.8 (+1 eps) [worker 1] episodes_seen=2120 last_return=-158.4 (+1 eps) [worker 0] episodes_seen=2160 last_return=-156.8 (+1 eps) [worker 3] episodes_seen=2130 last_return=-124.8 (+1 eps) [worker 2] episodes_seen=2150 last_return=-247.7 (+1 eps) [worker 1] episodes_seen=2130 last_return=-122.3 (+1 eps) [worker 0] episodes_seen=2170 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=2140 last_return=-186.5 (+1 eps) [worker 2] episodes_seen=2160 last_return=-176.2 (+1 eps) [worker 1] episodes_seen=2140 last_return=-166.2 (+1 eps) [worker 0] episodes_seen=2180 last_return=-146.3 (+1 eps) [worker 3] episodes_seen=2150 last_return=-202.4 (+1 eps) [worker 2] episodes_seen=2170 last_return=-261.7 (+1 eps) [worker 1] episodes_seen=2150 last_return=-170.3 (+1 eps) [worker 0] episodes_seen=2190 last_return=-139.4 (+1 eps) [worker 3] episodes_seen=2160 last_return=-194.0 (+1 eps) [worker 2] episodes_seen=2180 last_return=-277.8 (+1 eps) [worker 1] episodes_seen=2160 last_return=-125.8 (+1 eps) [worker 0] episodes_seen=2200 last_return=-106.3 (+1 eps) [worker 3] episodes_seen=2170 last_return=-137.7 (+1 eps) [A2C][sync] it=13501 steps= 1080080 (+ 80) avg10=-157.61 loss=24040766.000 pg=-0.000 vf=43710484.000 H=0.000 gn=70421472.000 [worker 1] episodes_seen=2170 last_return=-167.6 (+1 eps) [worker 2] episodes_seen=2190 last_return=-263.1 (+1 eps) [worker 0] episodes_seen=2210 last_return=-132.1 (+1 eps) [worker 3] episodes_seen=2180 last_return=-152.4 (+1 eps) [worker 1] episodes_seen=2180 last_return=-123.3 (+1 eps) [worker 2] episodes_seen=2200 last_return=-191.2 (+1 eps) [worker 0] episodes_seen=2220 last_return=-230.6 (+1 eps) [worker 3] episodes_seen=2190 last_return=-276.7 (+1 eps) [worker 1] episodes_seen=2190 last_return=-89.1 (+1 eps) [worker 2] episodes_seen=2210 last_return=-102.0 (+1 eps) [worker 0] episodes_seen=2230 last_return=-238.8 (+1 eps) [worker 1] episodes_seen=2200 last_return=-317.5 (+1 eps) [worker 3] episodes_seen=2200 last_return=-279.8 (+1 eps) [worker 2] episodes_seen=2220 last_return=-196.5 (+1 eps) [worker 0] episodes_seen=2240 last_return=-217.8 (+1 eps) [worker 1] episodes_seen=2210 last_return=-248.1 (+1 eps) [worker 3] episodes_seen=2210 last_return=-238.5 (+1 eps) [worker 0] episodes_seen=2250 last_return=-241.0 (+1 eps) [worker 2] episodes_seen=2230 last_return=-310.0 (+1 eps) [worker 1] episodes_seen=2220 last_return=-185.4 (+1 eps) [worker 3] episodes_seen=2220 last_return=-120.4 (+1 eps) [worker 2] episodes_seen=2240 last_return=-143.6 (+1 eps) [worker 0] episodes_seen=2260 last_return=-184.6 (+1 eps) [worker 1] episodes_seen=2230 last_return=-135.0 (+1 eps) [worker 3] episodes_seen=2230 last_return=-127.5 (+1 eps) [worker 0] episodes_seen=2270 last_return=-163.9 (+1 eps) [worker 2] episodes_seen=2250 last_return=-173.4 (+1 eps) [worker 1] episodes_seen=2240 last_return=-138.9 (+1 eps) [worker 3] episodes_seen=2240 last_return=-132.2 (+1 eps) [worker 0] episodes_seen=2280 last_return=-242.4 (+1 eps) [worker 2] episodes_seen=2260 last_return=-159.5 (+1 eps) [worker 1] episodes_seen=2250 last_return=-149.1 (+1 eps) [worker 3] episodes_seen=2250 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=2290 last_return=-224.3 (+1 eps) [worker 2] episodes_seen=2270 last_return=-160.8 (+1 eps) [worker 1] episodes_seen=2260 last_return=-157.4 (+1 eps) [worker 3] episodes_seen=2260 last_return=-197.1 (+1 eps) [worker 2] episodes_seen=2280 last_return=-140.1 (+1 eps) [worker 1] episodes_seen=2270 last_return=-107.8 (+1 eps) [worker 0] episodes_seen=2300 last_return=-193.0 (+1 eps) [worker 3] episodes_seen=2270 last_return=-184.6 (+1 eps) [worker 2] episodes_seen=2290 last_return=-153.2 (+1 eps) [worker 1] episodes_seen=2280 last_return=-153.4 (+1 eps) [worker 0] episodes_seen=2310 last_return=-160.7 (+1 eps) [worker 3] episodes_seen=2280 last_return=-179.5 (+1 eps) [worker 1] episodes_seen=2290 last_return=-158.3 (+1 eps) [worker 2] episodes_seen=2300 last_return=-189.9 (+1 eps) [worker 0] episodes_seen=2320 last_return=-130.4 (+1 eps) [A2C][sync] it=14251 steps= 1140080 (+ 80) avg10=-188.25 loss=11325272.000 pg=-0.000 vf=20591404.000 H=0.000 gn=16137360.000 [worker 3] episodes_seen=2290 last_return=-117.1 (+1 eps) [worker 1] episodes_seen=2300 last_return=-111.5 (+1 eps) [worker 0] episodes_seen=2330 last_return=-208.7 (+1 eps) [worker 3] episodes_seen=2300 last_return=-234.7 (+1 eps) [worker 2] episodes_seen=2310 last_return=-178.7 (+1 eps) [worker 1] episodes_seen=2310 last_return=-227.1 (+1 eps) [worker 3] episodes_seen=2310 last_return=-159.1 (+1 eps) [worker 0] episodes_seen=2340 last_return=-208.3 (+1 eps) [worker 2] episodes_seen=2320 last_return=-242.2 (+1 eps) [worker 1] episodes_seen=2320 last_return=-105.5 (+1 eps) [worker 3] episodes_seen=2320 last_return=-180.5 (+1 eps) [worker 0] episodes_seen=2350 last_return=-169.5 (+1 eps) [worker 2] episodes_seen=2330 last_return=-217.3 (+1 eps) [worker 1] episodes_seen=2330 last_return=-163.9 (+1 eps) [worker 3] episodes_seen=2330 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=2340 last_return=-105.5 (+1 eps) [worker 1] episodes_seen=2340 last_return=-125.5 (+1 eps) [worker 0] episodes_seen=2360 last_return=-125.0 (+1 eps) [worker 2] episodes_seen=2350 last_return=-163.7 (+1 eps) [worker 3] episodes_seen=2340 last_return=-315.0 (+1 eps) [worker 0] episodes_seen=2370 last_return=-201.4 (+1 eps) [worker 1] episodes_seen=2350 last_return=-144.4 (+1 eps) [worker 3] episodes_seen=2350 last_return=-259.3 (+1 eps) [worker 2] episodes_seen=2360 last_return=-389.7 (+1 eps) [worker 0] episodes_seen=2380 last_return=-99.7 (+1 eps) [worker 1] episodes_seen=2360 last_return=-107.7 (+1 eps) [worker 3] episodes_seen=2360 last_return=-126.3 (+1 eps) [worker 2] episodes_seen=2370 last_return=-111.6 (+1 eps) [worker 1] episodes_seen=2370 last_return=-214.2 (+1 eps) [worker 0] episodes_seen=2390 last_return=-256.9 (+1 eps) [worker 3] episodes_seen=2370 last_return=-112.5 (+1 eps) [worker 2] episodes_seen=2380 last_return=-224.3 (+1 eps) [worker 1] episodes_seen=2380 last_return=-170.6 (+1 eps) [worker 0] episodes_seen=2400 last_return=-153.3 (+1 eps) [worker 3] episodes_seen=2380 last_return=-226.3 (+1 eps) [worker 1] episodes_seen=2390 last_return=-106.6 (+1 eps) [worker 2] episodes_seen=2390 last_return=-134.5 (+1 eps) [worker 0] episodes_seen=2410 last_return=-135.2 (+1 eps) [worker 3] episodes_seen=2390 last_return=-205.3 (+1 eps) [worker 2] episodes_seen=2400 last_return=-194.6 (+1 eps) [worker 1] episodes_seen=2400 last_return=-235.2 (+1 eps) [worker 0] episodes_seen=2420 last_return=-188.6 (+1 eps) [worker 3] episodes_seen=2400 last_return=-122.2 (+1 eps) [worker 1] episodes_seen=2410 last_return=-174.8 (+1 eps) [worker 0] episodes_seen=2430 last_return=-165.9 (+1 eps) [worker 2] episodes_seen=2410 last_return=-261.5 (+1 eps) [worker 3] episodes_seen=2410 last_return=-263.3 (+1 eps) [A2C][sync] it=15000 steps= 1200000 (+ 80) avg10=-178.32 loss=55201720.000 pg=0.185 vf=100366760.000 H=0.248 gn=148623728.000 [Checkpoint] Saved self-describing checkpoint → part2_artifacts/checkpoints/a2c_run16_seed1227.pth [A2C][sync] done: steps=1200000 time=1148.8s avg10=-178.32
[Run run16_seed1227] checkpoint: part2_artifacts/checkpoints/a2c_run16_seed1227.pth [Run run16_seed1227] training plot (tail 500): part2_artifacts/train_curve_run16_seed1227.png [Run run16_seed1227] training plot (full): part2_artifacts/train_curve_full_run16_seed1227.png [Run run16_seed1227] per-worker plot: part2_artifacts/train_curve_workers_run16_seed1227.png [Run run16_seed1227] workers-average plot: part2_artifacts/train_curve_workers_avg_run16_seed1227.png
[Eval run16_seed1227] mean=-693.82 std=113.17 min=-927.28 max=-562.23 [Eval run16_seed1227] CSV: part2_artifacts/eval10_run16_seed1227.csv [Eval run16_seed1227] plot: part2_artifacts/eval10_run16_seed1227.png [Best] ep=6 return=-562.23 seed=1233
/usr/local/lib/python3.12/dist-packages/gymnasium/wrappers/rendering.py:293: UserWarning: WARN: Overwriting existing videos at /content/part2_artifacts/videos/run16_seed1227 folder (try specifying a different `video_folder` for the `RecordVideo` wrapper if this is not desired)
logger.warn(
[Video run16_seed1227] episode return=-562.23 [Video run16_seed1227] saved under: part2_artifacts/videos run16_seed1227 | mean=-693.8±113.2 | best_ep=6, best_ret=-562.2
Run#17
run_id = f"run17_seed{SEED}"
model, logs, paths = train_once(
run_id=run_id,
n_workers=2,
total_env_steps=1_200_000,
T=30,
gamma=0.99,
entropy_coef=0.010,
value_coef=0.65,
max_grad_norm=0.5,
lr=2.5e-4,
log_every=60_000,
)
metrics, _ = evaluate_10(run_id, paths.ckpt_path)
video_dir, best_idx, best_ret = record_best_from_eval(run_id, paths.ckpt_path)
print(f"{run_id} | mean={metrics['mean']:.1f}±{metrics['std']:.1f} | best_ep={best_idx}, best_ret={best_ret:.1f}")
[Run run17_seed1227] starting training… [A2C][sync] start: workers=2, T=30, target_steps=1200000, mp=fork [A2C][sync] it= 1 steps= 60 (+ 60) avg10= nan loss=229.699 pg=-0.000 vf=353.405 H=1.386 gn=55.463 [worker 0] episodes_seen=10 last_return=-105.5 (+1 eps) [worker 1] episodes_seen=10 last_return=-154.6 (+1 eps) [worker 0] episodes_seen=20 last_return=-108.1 (+1 eps) [worker 1] episodes_seen=20 last_return=-107.8 (+1 eps) [worker 0] episodes_seen=30 last_return=-182.4 (+1 eps) [worker 1] episodes_seen=30 last_return=-64.5 (+1 eps) [worker 0] episodes_seen=40 last_return=-169.9 (+1 eps) [worker 1] episodes_seen=40 last_return=-274.2 (+1 eps) [worker 0] episodes_seen=50 last_return=-222.8 (+1 eps) [worker 1] episodes_seen=50 last_return=-394.3 (+1 eps) [worker 0] episodes_seen=60 last_return=-261.5 (+1 eps) [worker 1] episodes_seen=60 last_return=-153.3 (+1 eps) [worker 0] episodes_seen=70 last_return=-315.1 (+1 eps) [worker 1] episodes_seen=70 last_return=-299.8 (+1 eps) [worker 0] episodes_seen=80 last_return=-281.6 (+1 eps) [worker 1] episodes_seen=80 last_return=-181.1 (+1 eps) [worker 0] episodes_seen=90 last_return=-228.9 (+1 eps) [worker 1] episodes_seen=90 last_return=-334.5 (+1 eps) [worker 0] episodes_seen=100 last_return=-137.4 (+1 eps) [worker 1] episodes_seen=100 last_return=-133.3 (+1 eps) [worker 0] episodes_seen=110 last_return=-165.9 (+1 eps) [worker 1] episodes_seen=110 last_return=-211.8 (+1 eps) [worker 0] episodes_seen=120 last_return=-124.6 (+1 eps) [worker 1] episodes_seen=120 last_return=-122.7 (+1 eps) [worker 0] episodes_seen=130 last_return=-169.5 (+1 eps) [worker 1] episodes_seen=130 last_return=-115.6 (+1 eps) [worker 0] episodes_seen=140 last_return=-167.8 (+1 eps) [worker 0] episodes_seen=150 last_return=-301.1 (+1 eps) [worker 1] episodes_seen=140 last_return=-260.7 (+1 eps) [worker 1] episodes_seen=150 last_return=-144.9 (+1 eps) [worker 0] episodes_seen=160 last_return=-264.8 (+1 eps) [worker 1] episodes_seen=160 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=170 last_return=-238.4 (+1 eps) [worker 1] episodes_seen=170 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=180 last_return=-191.7 (+1 eps) [worker 0] episodes_seen=190 last_return=-195.1 (+1 eps) [worker 1] episodes_seen=180 last_return=-321.2 (+1 eps) [worker 0] episodes_seen=200 last_return=-114.6 (+1 eps) [worker 1] episodes_seen=190 last_return=-274.4 (+1 eps) [worker 1] episodes_seen=200 last_return=-160.2 (+1 eps) [worker 0] episodes_seen=210 last_return=-136.0 (+1 eps) [worker 0] episodes_seen=220 last_return=-285.3 (+1 eps) [worker 1] episodes_seen=210 last_return=-367.7 (+1 eps) [worker 0] episodes_seen=230 last_return=-294.2 (+1 eps) [worker 1] episodes_seen=220 last_return=-307.0 (+1 eps) [worker 0] episodes_seen=240 last_return=-311.2 (+1 eps) [worker 1] episodes_seen=230 last_return=-233.3 (+1 eps) [A2C][sync] it= 1001 steps= 60060 (+ 60) avg10=-231.02 loss=9145.278 pg=-0.000 vf=14069.660 H=0.000 gn=40504.879 [worker 0] episodes_seen=250 last_return=-210.6 (+1 eps) [worker 1] episodes_seen=240 last_return=-137.8 (+1 eps) [worker 1] episodes_seen=250 last_return=-120.5 (+1 eps) [worker 0] episodes_seen=260 last_return=-327.5 (+1 eps) [worker 1] episodes_seen=260 last_return=-208.2 (+1 eps) [worker 0] episodes_seen=270 last_return=-127.1 (+1 eps) [worker 1] episodes_seen=270 last_return=-372.0 (+1 eps) [worker 0] episodes_seen=280 last_return=-179.0 (+1 eps) [worker 1] episodes_seen=280 last_return=-247.0 (+1 eps) [worker 0] episodes_seen=290 last_return=-151.0 (+1 eps) [worker 1] episodes_seen=290 last_return=-336.9 (+1 eps) [worker 0] episodes_seen=300 last_return=-109.0 (+1 eps) [worker 1] episodes_seen=300 last_return=-171.9 (+1 eps) [worker 0] episodes_seen=310 last_return=-247.3 (+1 eps) [worker 1] episodes_seen=310 last_return=-120.7 (+1 eps) [worker 0] episodes_seen=320 last_return=-293.6 (+1 eps) [worker 1] episodes_seen=320 last_return=-158.4 (+1 eps) [worker 0] episodes_seen=330 last_return=-373.6 (+1 eps) [worker 1] episodes_seen=330 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=340 last_return=-245.4 (+1 eps) [worker 1] episodes_seen=340 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=350 last_return=-157.2 (+1 eps) [worker 1] episodes_seen=350 last_return=-139.0 (+1 eps) [worker 1] episodes_seen=360 last_return=-210.2 (+1 eps) [worker 0] episodes_seen=360 last_return=-217.2 (+1 eps) [worker 1] episodes_seen=370 last_return=-123.3 (+1 eps) [worker 0] episodes_seen=370 last_return=-274.7 (+1 eps) [worker 1] episodes_seen=380 last_return=-128.0 (+1 eps) [worker 0] episodes_seen=380 last_return=-244.9 (+1 eps) [worker 1] episodes_seen=390 last_return=-276.9 (+1 eps) [worker 0] episodes_seen=390 last_return=-316.5 (+1 eps) [worker 1] episodes_seen=400 last_return=-247.5 (+1 eps) [worker 0] episodes_seen=400 last_return=-161.4 (+1 eps) [worker 1] episodes_seen=410 last_return=-362.5 (+1 eps) [worker 0] episodes_seen=410 last_return=-348.0 (+1 eps) [worker 1] episodes_seen=420 last_return=-259.7 (+1 eps) [worker 0] episodes_seen=420 last_return=-225.8 (+1 eps) [worker 1] episodes_seen=430 last_return=-238.0 (+1 eps) [worker 0] episodes_seen=430 last_return=-287.3 (+1 eps) [worker 1] episodes_seen=440 last_return=-172.3 (+1 eps) [worker 0] episodes_seen=440 last_return=-179.4 (+1 eps) [worker 1] episodes_seen=450 last_return=-179.3 (+1 eps) [worker 0] episodes_seen=450 last_return=-294.4 (+1 eps) [worker 1] episodes_seen=460 last_return=-264.5 (+1 eps) [worker 0] episodes_seen=460 last_return=-319.7 (+1 eps) [worker 1] episodes_seen=470 last_return=-153.5 (+1 eps) [worker 0] episodes_seen=470 last_return=-298.1 (+1 eps) [worker 1] episodes_seen=480 last_return=-303.6 (+1 eps) [worker 0] episodes_seen=480 last_return=-145.2 (+1 eps) [worker 1] episodes_seen=490 last_return=-136.0 (+1 eps) [A2C][sync] it= 2001 steps= 120060 (+ 60) avg10=-212.81 loss=413193.938 pg=-0.000 vf=635683.000 H=0.000 gn=372374.781 [worker 0] episodes_seen=490 last_return=-215.1 (+1 eps) [worker 1] episodes_seen=500 last_return=-179.8 (+1 eps) [worker 0] episodes_seen=500 last_return=-291.9 (+1 eps) [worker 1] episodes_seen=510 last_return=-186.3 (+1 eps) [worker 0] episodes_seen=510 last_return=-136.2 (+1 eps) [worker 1] episodes_seen=520 last_return=-334.6 (+1 eps) [worker 0] episodes_seen=520 last_return=-296.7 (+1 eps) [worker 1] episodes_seen=530 last_return=-265.7 (+1 eps) [worker 0] episodes_seen=530 last_return=-254.8 (+1 eps) [worker 1] episodes_seen=540 last_return=-167.5 (+1 eps) [worker 0] episodes_seen=540 last_return=-228.6 (+1 eps) [worker 1] episodes_seen=550 last_return=-275.3 (+1 eps) [worker 0] episodes_seen=550 last_return=-350.4 (+1 eps) [worker 1] episodes_seen=560 last_return=-229.8 (+1 eps) [worker 0] episodes_seen=560 last_return=-313.1 (+1 eps) [worker 1] episodes_seen=570 last_return=-273.5 (+1 eps) [worker 0] episodes_seen=570 last_return=-303.6 (+1 eps) [worker 1] episodes_seen=580 last_return=-150.3 (+1 eps) [worker 0] episodes_seen=580 last_return=-356.1 (+1 eps) [worker 1] episodes_seen=590 last_return=-215.6 (+1 eps) [worker 0] episodes_seen=590 last_return=-263.1 (+1 eps) [worker 1] episodes_seen=600 last_return=-410.8 (+1 eps) [worker 0] episodes_seen=600 last_return=-148.3 (+1 eps) [worker 1] episodes_seen=610 last_return=-148.6 (+1 eps) [worker 0] episodes_seen=610 last_return=-206.8 (+1 eps) [worker 1] episodes_seen=620 last_return=-307.7 (+1 eps) [worker 0] episodes_seen=620 last_return=-341.7 (+1 eps) [worker 1] episodes_seen=630 last_return=-156.7 (+1 eps) [worker 0] episodes_seen=630 last_return=-232.9 (+1 eps) [worker 1] episodes_seen=640 last_return=-231.4 (+1 eps) [worker 0] episodes_seen=640 last_return=-222.6 (+1 eps) [worker 1] episodes_seen=650 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=650 last_return=-287.2 (+1 eps) [worker 1] episodes_seen=660 last_return=-107.7 (+1 eps) [worker 0] episodes_seen=660 last_return=-262.5 (+1 eps) [worker 1] episodes_seen=670 last_return=-337.4 (+1 eps) [worker 0] episodes_seen=670 last_return=-215.6 (+1 eps) [worker 1] episodes_seen=680 last_return=-132.8 (+1 eps) [worker 0] episodes_seen=680 last_return=-228.7 (+1 eps) [worker 1] episodes_seen=690 last_return=-236.7 (+1 eps) [worker 0] episodes_seen=690 last_return=-145.0 (+1 eps) [worker 1] episodes_seen=700 last_return=-107.6 (+1 eps) [worker 0] episodes_seen=700 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=710 last_return=-178.4 (+1 eps) [worker 0] episodes_seen=710 last_return=-348.2 (+1 eps) [worker 0] episodes_seen=720 last_return=-290.3 (+1 eps) [worker 1] episodes_seen=720 last_return=-294.1 (+1 eps) [worker 1] episodes_seen=730 last_return=-150.9 (+1 eps) [worker 0] episodes_seen=730 last_return=-174.2 (+1 eps) [worker 1] episodes_seen=740 last_return=-301.7 (+1 eps) [worker 0] episodes_seen=740 last_return=-205.8 (+1 eps) [A2C][sync] it= 3001 steps= 180060 (+ 60) avg10=-237.67 loss=628681.812 pg=-0.000 vf=967202.812 H=0.000 gn=494837.844 [worker 1] episodes_seen=750 last_return=-158.8 (+1 eps) [worker 0] episodes_seen=750 last_return=-252.8 (+1 eps) [worker 1] episodes_seen=760 last_return=-418.7 (+1 eps) [worker 0] episodes_seen=760 last_return=-352.7 (+1 eps) [worker 1] episodes_seen=770 last_return=-351.4 (+1 eps) [worker 0] episodes_seen=770 last_return=-210.8 (+1 eps) [worker 1] episodes_seen=780 last_return=-213.5 (+1 eps) [worker 0] episodes_seen=780 last_return=-279.7 (+1 eps) [worker 1] episodes_seen=790 last_return=-264.7 (+1 eps) [worker 0] episodes_seen=790 last_return=-305.4 (+1 eps) [worker 0] episodes_seen=800 last_return=-145.8 (+1 eps) [worker 1] episodes_seen=800 last_return=-314.8 (+1 eps) [worker 0] episodes_seen=810 last_return=-217.8 (+1 eps) [worker 1] episodes_seen=810 last_return=-292.0 (+1 eps) [worker 0] episodes_seen=820 last_return=-221.2 (+1 eps) [worker 1] episodes_seen=820 last_return=-294.9 (+1 eps) [worker 0] episodes_seen=830 last_return=-212.1 (+1 eps) [worker 1] episodes_seen=830 last_return=-272.2 (+1 eps) [worker 0] episodes_seen=840 last_return=-214.1 (+1 eps) [worker 1] episodes_seen=840 last_return=-294.3 (+1 eps) [worker 0] episodes_seen=850 last_return=-197.3 (+1 eps) [worker 1] episodes_seen=850 last_return=-293.5 (+1 eps) [worker 1] episodes_seen=860 last_return=-308.1 (+1 eps) [worker 0] episodes_seen=860 last_return=-207.0 (+1 eps) [worker 1] episodes_seen=870 last_return=-217.2 (+1 eps) [worker 0] episodes_seen=870 last_return=-218.8 (+1 eps) [worker 1] episodes_seen=880 last_return=-258.6 (+1 eps) [worker 0] episodes_seen=880 last_return=-177.2 (+1 eps) [worker 0] episodes_seen=890 last_return=-143.6 (+1 eps) [worker 1] episodes_seen=890 last_return=-110.4 (+1 eps) [worker 0] episodes_seen=900 last_return=-152.9 (+1 eps) [worker 1] episodes_seen=900 last_return=-311.1 (+1 eps) [worker 1] episodes_seen=910 last_return=-286.4 (+1 eps) [worker 0] episodes_seen=910 last_return=-216.3 (+1 eps) [worker 1] episodes_seen=920 last_return=-152.2 (+1 eps) [worker 0] episodes_seen=920 last_return=-137.3 (+1 eps) [worker 1] episodes_seen=930 last_return=-299.4 (+1 eps) [worker 0] episodes_seen=930 last_return=-300.1 (+1 eps) [worker 1] episodes_seen=940 last_return=-211.8 (+1 eps) [worker 0] episodes_seen=940 last_return=-275.4 (+1 eps) [worker 1] episodes_seen=950 last_return=-299.8 (+1 eps) [worker 0] episodes_seen=950 last_return=-312.1 (+1 eps) [worker 1] episodes_seen=960 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=960 last_return=-125.3 (+1 eps) [worker 1] episodes_seen=970 last_return=-327.6 (+1 eps) [worker 0] episodes_seen=970 last_return=-120.1 (+1 eps) [worker 1] episodes_seen=980 last_return=-151.3 (+1 eps) [worker 1] episodes_seen=990 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=980 last_return=-239.5 (+1 eps) [A2C][sync] it= 4001 steps= 240060 (+ 60) avg10=-218.90 loss=332219.062 pg=-0.000 vf=511106.281 H=0.000 gn=279799.344 [worker 1] episodes_seen=1000 last_return=-215.5 (+1 eps) [worker 0] episodes_seen=990 last_return=-235.9 (+1 eps) [worker 0] episodes_seen=1000 last_return=-278.4 (+1 eps) [worker 1] episodes_seen=1010 last_return=-175.2 (+1 eps) [worker 1] episodes_seen=1020 last_return=-108.7 (+1 eps) [worker 0] episodes_seen=1010 last_return=-264.8 (+1 eps) [worker 1] episodes_seen=1030 last_return=-127.8 (+1 eps) [worker 0] episodes_seen=1020 last_return=-119.2 (+1 eps) [worker 1] episodes_seen=1040 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1030 last_return=-147.4 (+1 eps) [worker 1] episodes_seen=1050 last_return=-307.6 (+1 eps) [worker 0] episodes_seen=1040 last_return=-108.4 (+1 eps) [worker 0] episodes_seen=1050 last_return=-360.8 (+1 eps) [worker 1] episodes_seen=1060 last_return=-171.0 (+1 eps) [worker 0] episodes_seen=1060 last_return=-200.5 (+1 eps) [worker 1] episodes_seen=1070 last_return=-129.3 (+1 eps) [worker 0] episodes_seen=1070 last_return=-305.1 (+1 eps) [worker 1] episodes_seen=1080 last_return=-355.0 (+1 eps) [worker 1] episodes_seen=1090 last_return=-271.0 (+1 eps) [worker 0] episodes_seen=1080 last_return=-112.5 (+1 eps) [worker 1] episodes_seen=1100 last_return=-205.2 (+1 eps) [worker 0] episodes_seen=1090 last_return=-367.3 (+1 eps) [worker 0] episodes_seen=1100 last_return=-255.9 (+1 eps) [worker 1] episodes_seen=1110 last_return=-337.2 (+1 eps) [worker 0] episodes_seen=1110 last_return=-247.1 (+1 eps) [worker 1] episodes_seen=1120 last_return=-212.3 (+1 eps) [worker 0] episodes_seen=1120 last_return=-217.5 (+1 eps) [worker 1] episodes_seen=1130 last_return=-270.1 (+1 eps) [worker 0] episodes_seen=1130 last_return=-268.6 (+1 eps) [worker 1] episodes_seen=1140 last_return=-193.1 (+1 eps) [worker 0] episodes_seen=1140 last_return=-141.6 (+1 eps) [worker 1] episodes_seen=1150 last_return=-343.0 (+1 eps) [worker 0] episodes_seen=1150 last_return=-283.2 (+1 eps) [worker 0] episodes_seen=1160 last_return=-239.0 (+1 eps) [worker 1] episodes_seen=1160 last_return=-175.2 (+1 eps) [worker 1] episodes_seen=1170 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1170 last_return=-228.0 (+1 eps) [worker 1] episodes_seen=1180 last_return=-276.7 (+1 eps) [worker 0] episodes_seen=1180 last_return=-252.8 (+1 eps) [worker 1] episodes_seen=1190 last_return=-378.0 (+1 eps) [worker 0] episodes_seen=1190 last_return=-243.5 (+1 eps) [worker 1] episodes_seen=1200 last_return=-314.3 (+1 eps) [worker 0] episodes_seen=1200 last_return=-201.3 (+1 eps) [worker 1] episodes_seen=1210 last_return=-173.5 (+1 eps) [worker 0] episodes_seen=1210 last_return=-317.7 (+1 eps) [worker 1] episodes_seen=1220 last_return=-276.5 (+1 eps) [worker 0] episodes_seen=1220 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1230 last_return=-125.4 (+1 eps) [worker 0] episodes_seen=1230 last_return=-209.4 (+1 eps) [worker 1] episodes_seen=1240 last_return=-246.3 (+1 eps) [A2C][sync] it= 5001 steps= 300060 (+ 60) avg10=-218.16 loss=505.287 pg=-0.000 vf=777.364 H=0.000 gn=12605.957 [worker 0] episodes_seen=1240 last_return=-150.4 (+1 eps) [worker 0] episodes_seen=1250 last_return=-332.5 (+1 eps) [worker 1] episodes_seen=1250 last_return=-209.3 (+1 eps) [worker 1] episodes_seen=1260 last_return=-145.1 (+1 eps) [worker 0] episodes_seen=1260 last_return=-126.3 (+1 eps) [worker 1] episodes_seen=1270 last_return=-185.0 (+1 eps) [worker 0] episodes_seen=1270 last_return=-242.6 (+1 eps) [worker 1] episodes_seen=1280 last_return=-254.8 (+1 eps) [worker 0] episodes_seen=1280 last_return=-458.5 (+1 eps) [worker 1] episodes_seen=1290 last_return=-332.2 (+1 eps) [worker 0] episodes_seen=1290 last_return=-166.3 (+1 eps) [worker 1] episodes_seen=1300 last_return=-467.5 (+1 eps) [worker 0] episodes_seen=1300 last_return=-245.7 (+1 eps) [worker 1] episodes_seen=1310 last_return=-277.2 (+1 eps) [worker 0] episodes_seen=1310 last_return=-190.7 (+1 eps) [worker 0] episodes_seen=1320 last_return=-278.9 (+1 eps) [worker 1] episodes_seen=1320 last_return=-138.3 (+1 eps) [worker 0] episodes_seen=1330 last_return=-220.8 (+1 eps) [worker 1] episodes_seen=1330 last_return=-144.1 (+1 eps) [worker 0] episodes_seen=1340 last_return=-324.5 (+1 eps) [worker 1] episodes_seen=1340 last_return=-184.7 (+1 eps) [worker 0] episodes_seen=1350 last_return=-313.7 (+1 eps) [worker 0] episodes_seen=1360 last_return=-183.3 (+1 eps) [worker 1] episodes_seen=1350 last_return=-174.8 (+1 eps) [worker 1] episodes_seen=1360 last_return=-220.4 (+1 eps) [worker 0] episodes_seen=1370 last_return=-235.9 (+1 eps) [worker 1] episodes_seen=1370 last_return=-146.9 (+1 eps) [worker 0] episodes_seen=1380 last_return=-228.8 (+1 eps) [worker 1] episodes_seen=1380 last_return=-224.0 (+1 eps) [worker 0] episodes_seen=1390 last_return=-277.4 (+1 eps) [worker 1] episodes_seen=1390 last_return=-224.8 (+1 eps) [worker 0] episodes_seen=1400 last_return=-132.4 (+1 eps) [worker 1] episodes_seen=1400 last_return=-134.1 (+1 eps) [worker 0] episodes_seen=1410 last_return=-252.9 (+1 eps) [worker 0] episodes_seen=1420 last_return=-185.6 (+1 eps) [worker 1] episodes_seen=1410 last_return=-116.5 (+1 eps) [worker 1] episodes_seen=1420 last_return=-183.5 (+1 eps) [worker 0] episodes_seen=1430 last_return=-262.1 (+1 eps) [worker 0] episodes_seen=1440 last_return=-271.3 (+1 eps) [worker 1] episodes_seen=1430 last_return=-327.9 (+1 eps) [worker 0] episodes_seen=1450 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1440 last_return=-278.9 (+1 eps) [worker 0] episodes_seen=1460 last_return=-384.3 (+1 eps) [worker 1] episodes_seen=1450 last_return=-215.6 (+1 eps) [worker 0] episodes_seen=1470 last_return=-294.0 (+1 eps) [worker 1] episodes_seen=1460 last_return=-112.1 (+1 eps) [worker 0] episodes_seen=1480 last_return=-287.4 (+1 eps) [worker 1] episodes_seen=1470 last_return=-53.7 (+1 eps) [worker 0] episodes_seen=1490 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1480 last_return=-213.3 (+1 eps) [A2C][sync] it= 6001 steps= 360060 (+ 60) avg10=-271.11 loss=209007.453 pg=-0.002 vf=321549.938 H=0.020 gn=100296.258 [worker 0] episodes_seen=1500 last_return=-218.8 (+1 eps) [worker 1] episodes_seen=1490 last_return=-191.9 (+1 eps) [worker 0] episodes_seen=1510 last_return=-220.8 (+1 eps) [worker 1] episodes_seen=1500 last_return=-375.5 (+1 eps) [worker 0] episodes_seen=1520 last_return=-416.5 (+1 eps) [worker 1] episodes_seen=1510 last_return=-110.8 (+1 eps) [worker 0] episodes_seen=1530 last_return=-237.5 (+1 eps) [worker 1] episodes_seen=1520 last_return=-302.3 (+1 eps) [worker 0] episodes_seen=1540 last_return=-139.8 (+1 eps) [worker 1] episodes_seen=1530 last_return=-181.1 (+1 eps) [worker 0] episodes_seen=1550 last_return=-330.7 (+1 eps) [worker 1] episodes_seen=1540 last_return=-111.1 (+1 eps) [worker 0] episodes_seen=1560 last_return=-301.3 (+1 eps) [worker 1] episodes_seen=1550 last_return=-238.5 (+1 eps) [worker 0] episodes_seen=1570 last_return=-327.0 (+1 eps) [worker 1] episodes_seen=1560 last_return=-317.1 (+1 eps) [worker 0] episodes_seen=1580 last_return=-193.2 (+1 eps) [worker 1] episodes_seen=1570 last_return=-265.8 (+1 eps) [worker 0] episodes_seen=1590 last_return=-318.2 (+1 eps) [worker 1] episodes_seen=1580 last_return=-338.5 (+1 eps) [worker 0] episodes_seen=1600 last_return=-161.1 (+1 eps) [worker 1] episodes_seen=1590 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1610 last_return=-415.1 (+1 eps) [worker 1] episodes_seen=1600 last_return=-410.8 (+1 eps) [worker 0] episodes_seen=1620 last_return=-313.8 (+1 eps) [worker 1] episodes_seen=1610 last_return=-129.5 (+1 eps) [worker 0] episodes_seen=1630 last_return=-132.7 (+1 eps) [worker 1] episodes_seen=1620 last_return=-127.5 (+1 eps) [worker 0] episodes_seen=1640 last_return=-323.1 (+1 eps) [worker 1] episodes_seen=1630 last_return=-244.0 (+1 eps) [worker 0] episodes_seen=1650 last_return=-383.2 (+1 eps) [worker 1] episodes_seen=1640 last_return=-202.9 (+1 eps) [worker 0] episodes_seen=1660 last_return=-127.5 (+1 eps) [worker 1] episodes_seen=1650 last_return=-199.1 (+1 eps) [worker 0] episodes_seen=1670 last_return=-440.1 (+1 eps) [worker 1] episodes_seen=1660 last_return=-26.9 (+1 eps) [worker 0] episodes_seen=1680 last_return=-361.8 (+1 eps) [worker 1] episodes_seen=1670 last_return=-403.0 (+1 eps) [worker 0] episodes_seen=1690 last_return=-182.2 (+1 eps) [worker 1] episodes_seen=1680 last_return=-224.4 (+1 eps) [worker 0] episodes_seen=1700 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1690 last_return=-281.2 (+1 eps) [worker 0] episodes_seen=1710 last_return=-373.0 (+1 eps) [worker 1] episodes_seen=1700 last_return=-291.6 (+1 eps) [worker 0] episodes_seen=1720 last_return=-316.1 (+1 eps) [worker 1] episodes_seen=1710 last_return=-314.8 (+1 eps) [worker 0] episodes_seen=1730 last_return=-364.8 (+1 eps) [worker 1] episodes_seen=1720 last_return=-345.1 (+1 eps) [worker 0] episodes_seen=1740 last_return=-227.3 (+1 eps) [worker 1] episodes_seen=1730 last_return=-199.6 (+1 eps) [worker 0] episodes_seen=1750 last_return=-438.7 (+1 eps) [worker 1] episodes_seen=1740 last_return=-16.9 (+1 eps) [worker 0] episodes_seen=1760 last_return=-377.9 (+1 eps) [worker 1] episodes_seen=1750 last_return=-353.2 (+1 eps) [worker 0] episodes_seen=1770 last_return=-399.0 (+1 eps) [worker 1] episodes_seen=1760 last_return=-378.4 (+1 eps) [worker 0] episodes_seen=1780 last_return=-420.7 (+1 eps) [worker 1] episodes_seen=1770 last_return=-153.4 (+1 eps) [worker 0] episodes_seen=1790 last_return=-175.6 (+1 eps) [worker 1] episodes_seen=1780 last_return=-149.7 (+1 eps) [worker 0] episodes_seen=1800 last_return=-475.1 (+1 eps) [worker 1] episodes_seen=1790 last_return=-166.4 (+1 eps) [worker 1] episodes_seen=1800 last_return=-182.6 (+1 eps) [worker 0] episodes_seen=1810 last_return=-18.0 (+1 eps) [worker 1] episodes_seen=1810 last_return=-374.9 (+1 eps) [worker 0] episodes_seen=1820 last_return=-377.2 (+1 eps) [worker 1] episodes_seen=1820 last_return=-239.6 (+1 eps) [worker 0] episodes_seen=1830 last_return=-343.3 (+1 eps) [worker 1] episodes_seen=1830 last_return=-45.1 (+1 eps) [worker 0] episodes_seen=1840 last_return=-82.6 (+1 eps) [worker 1] episodes_seen=1840 last_return=-410.9 (+1 eps) [worker 0] episodes_seen=1850 last_return=-148.6 (+1 eps) [worker 1] episodes_seen=1850 last_return=-242.9 (+1 eps) [worker 0] episodes_seen=1860 last_return=-151.9 (+1 eps) [worker 1] episodes_seen=1860 last_return=-133.6 (+1 eps) [worker 0] episodes_seen=1870 last_return=-215.5 (+1 eps) [worker 0] episodes_seen=1880 last_return=-206.9 (+1 eps) [worker 1] episodes_seen=1870 last_return=-243.6 (+1 eps) [worker 0] episodes_seen=1890 last_return=-141.6 (+1 eps) [worker 1] episodes_seen=1880 last_return=-144.4 (+1 eps) [worker 1] episodes_seen=1890 last_return=-228.3 (+1 eps) [worker 0] episodes_seen=1900 last_return=-218.2 (+1 eps) [worker 1] episodes_seen=1900 last_return=-195.4 (+1 eps) [worker 0] episodes_seen=1910 last_return=-333.3 (+1 eps) [worker 1] episodes_seen=1910 last_return=-470.1 (+1 eps) [worker 0] episodes_seen=1920 last_return=-265.5 (+1 eps) [worker 1] episodes_seen=1920 last_return=-223.1 (+1 eps) [worker 0] episodes_seen=1930 last_return=-172.9 (+1 eps) [A2C][sync] it= 7001 steps= 420060 (+ 60) avg10=-249.88 loss=10250.739 pg=-0.016 vf=15770.395 H=0.018 gn=39598.383 [worker 1] episodes_seen=1930 last_return=-245.4 (+1 eps) [worker 0] episodes_seen=1940 last_return=-168.5 (+1 eps) [worker 1] episodes_seen=1940 last_return=-443.9 (+1 eps) [worker 0] episodes_seen=1950 last_return=-256.0 (+1 eps) [worker 1] episodes_seen=1950 last_return=-356.3 (+1 eps) [worker 0] episodes_seen=1960 last_return=-326.2 (+1 eps) [worker 1] episodes_seen=1960 last_return=-200.0 (+1 eps) [worker 0] episodes_seen=1970 last_return=-373.4 (+1 eps) [worker 1] episodes_seen=1970 last_return=-225.9 (+1 eps) [worker 0] episodes_seen=1980 last_return=-113.3 (+1 eps) [worker 0] episodes_seen=1990 last_return=-329.5 (+1 eps) [worker 1] episodes_seen=1980 last_return=-221.6 (+1 eps) [worker 0] episodes_seen=2000 last_return=-113.1 (+1 eps) [worker 1] episodes_seen=1990 last_return=-292.6 (+1 eps) [worker 0] episodes_seen=2010 last_return=-255.4 (+1 eps) [worker 1] episodes_seen=2000 last_return=-461.7 (+1 eps) [worker 0] episodes_seen=2020 last_return=-423.1 (+1 eps) [worker 1] episodes_seen=2010 last_return=-261.5 (+1 eps) [worker 0] episodes_seen=2030 last_return=-242.8 (+1 eps) [worker 1] episodes_seen=2020 last_return=-414.3 (+1 eps) [worker 0] episodes_seen=2040 last_return=-229.7 (+1 eps) [worker 1] episodes_seen=2030 last_return=-134.8 (+1 eps) [worker 0] episodes_seen=2050 last_return=-303.8 (+1 eps) [worker 1] episodes_seen=2040 last_return=-319.3 (+1 eps) [worker 0] episodes_seen=2060 last_return=-275.1 (+1 eps) [worker 1] episodes_seen=2050 last_return=-385.2 (+1 eps) [worker 0] episodes_seen=2070 last_return=-250.8 (+1 eps) [worker 1] episodes_seen=2060 last_return=-235.4 (+1 eps) [worker 0] episodes_seen=2080 last_return=-306.1 (+1 eps) [worker 1] episodes_seen=2070 last_return=-81.7 (+1 eps) [worker 0] episodes_seen=2090 last_return=-357.7 (+1 eps) [worker 1] episodes_seen=2080 last_return=-175.7 (+1 eps) [worker 0] episodes_seen=2100 last_return=-305.9 (+1 eps) [worker 1] episodes_seen=2090 last_return=-421.9 (+1 eps) [worker 0] episodes_seen=2110 last_return=0.1 (+1 eps) [worker 1] episodes_seen=2100 last_return=-336.2 (+1 eps) [worker 0] episodes_seen=2120 last_return=-476.2 (+1 eps) [worker 1] episodes_seen=2110 last_return=-302.8 (+1 eps) [worker 0] episodes_seen=2130 last_return=-199.2 (+1 eps) [worker 1] episodes_seen=2120 last_return=-281.6 (+1 eps) [worker 0] episodes_seen=2140 last_return=-197.3 (+1 eps) [worker 1] episodes_seen=2130 last_return=-131.9 (+1 eps) [worker 0] episodes_seen=2150 last_return=-362.7 (+1 eps) [worker 1] episodes_seen=2140 last_return=-315.9 (+1 eps) [worker 0] episodes_seen=2160 last_return=-44.9 (+1 eps) [worker 1] episodes_seen=2150 last_return=-317.3 (+1 eps) [worker 0] episodes_seen=2170 last_return=-225.3 (+1 eps) [worker 1] episodes_seen=2160 last_return=-310.7 (+1 eps) [worker 0] episodes_seen=2180 last_return=-282.8 (+1 eps) [worker 1] episodes_seen=2170 last_return=-320.4 (+1 eps) [worker 0] episodes_seen=2190 last_return=-197.2 (+1 eps) [worker 1] episodes_seen=2180 last_return=-316.8 (+1 eps) [worker 0] episodes_seen=2200 last_return=-339.3 (+1 eps) [worker 1] episodes_seen=2190 last_return=-192.0 (+1 eps) [worker 0] episodes_seen=2210 last_return=-174.1 (+1 eps) [worker 1] episodes_seen=2200 last_return=-312.1 (+1 eps) [worker 0] episodes_seen=2220 last_return=-282.7 (+1 eps) [worker 1] episodes_seen=2210 last_return=-115.0 (+1 eps) [worker 0] episodes_seen=2230 last_return=-145.0 (+1 eps) [worker 1] episodes_seen=2220 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=2240 last_return=-241.0 (+1 eps) [worker 1] episodes_seen=2230 last_return=-151.4 (+1 eps) [worker 0] episodes_seen=2250 last_return=-267.8 (+1 eps) [worker 1] episodes_seen=2240 last_return=-226.5 (+1 eps) [worker 0] episodes_seen=2260 last_return=-258.5 (+1 eps) [worker 1] episodes_seen=2250 last_return=-255.6 (+1 eps) [worker 0] episodes_seen=2270 last_return=133.7 (+1 eps) [worker 1] episodes_seen=2260 last_return=-231.1 (+1 eps) [worker 0] episodes_seen=2280 last_return=-174.0 (+1 eps) [worker 1] episodes_seen=2270 last_return=-171.0 (+1 eps) [worker 0] episodes_seen=2290 last_return=-221.6 (+1 eps) [worker 1] episodes_seen=2280 last_return=-200.9 (+1 eps) [worker 0] episodes_seen=2300 last_return=-409.5 (+1 eps) [worker 1] episodes_seen=2290 last_return=-230.6 (+1 eps) [worker 0] episodes_seen=2310 last_return=-110.9 (+1 eps) [worker 1] episodes_seen=2300 last_return=-328.8 (+1 eps) [worker 0] episodes_seen=2320 last_return=-135.9 (+1 eps) [worker 1] episodes_seen=2310 last_return=-383.7 (+1 eps) [worker 0] episodes_seen=2330 last_return=-301.2 (+1 eps) [worker 1] episodes_seen=2320 last_return=-379.3 (+1 eps) [worker 0] episodes_seen=2340 last_return=-418.8 (+1 eps) [worker 1] episodes_seen=2330 last_return=-161.8 (+1 eps) [worker 1] episodes_seen=2340 last_return=-237.7 (+1 eps) [worker 0] episodes_seen=2350 last_return=31.2 (+1 eps) [worker 1] episodes_seen=2350 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=2360 last_return=-119.3 (+1 eps) [worker 1] episodes_seen=2360 last_return=-233.7 (+1 eps) [worker 0] episodes_seen=2370 last_return=-323.0 (+1 eps) [worker 1] episodes_seen=2370 last_return=-359.0 (+1 eps) [worker 0] episodes_seen=2380 last_return=-345.0 (+1 eps) [A2C][sync] it= 8001 steps= 480060 (+ 60) avg10=-287.34 loss=19216.773 pg=0.000 vf=29564.268 H=0.001 gn=14750.963 [worker 1] episodes_seen=2380 last_return=-195.2 (+1 eps) [worker 0] episodes_seen=2390 last_return=-215.3 (+1 eps) [worker 1] episodes_seen=2390 last_return=-71.6 (+1 eps) [worker 0] episodes_seen=2400 last_return=-27.5 (+1 eps) [worker 1] episodes_seen=2400 last_return=-363.4 (+1 eps) [worker 0] episodes_seen=2410 last_return=-243.5 (+1 eps) [worker 1] episodes_seen=2410 last_return=-327.7 (+1 eps) [worker 0] episodes_seen=2420 last_return=-319.5 (+1 eps) [worker 1] episodes_seen=2420 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=2430 last_return=-334.3 (+1 eps) [worker 1] episodes_seen=2430 last_return=-333.1 (+1 eps) [worker 0] episodes_seen=2440 last_return=-152.6 (+1 eps) [worker 1] episodes_seen=2440 last_return=-290.7 (+1 eps) [worker 0] episodes_seen=2450 last_return=-178.7 (+1 eps) [worker 1] episodes_seen=2450 last_return=-125.6 (+1 eps) [worker 0] episodes_seen=2460 last_return=-205.5 (+1 eps) [worker 1] episodes_seen=2460 last_return=-294.5 (+1 eps) [worker 0] episodes_seen=2470 last_return=-230.8 (+1 eps) [worker 1] episodes_seen=2470 last_return=-228.7 (+1 eps) [worker 0] episodes_seen=2480 last_return=-133.3 (+1 eps) [worker 1] episodes_seen=2480 last_return=-291.2 (+1 eps) [worker 0] episodes_seen=2490 last_return=-356.5 (+1 eps) [worker 1] episodes_seen=2490 last_return=-201.4 (+1 eps) [worker 0] episodes_seen=2500 last_return=-299.9 (+1 eps) [worker 1] episodes_seen=2500 last_return=-193.9 (+1 eps) [worker 0] episodes_seen=2510 last_return=-380.8 (+1 eps) [worker 1] episodes_seen=2510 last_return=-335.7 (+1 eps) [worker 0] episodes_seen=2520 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2520 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=2530 last_return=-325.5 (+1 eps) [worker 1] episodes_seen=2530 last_return=-324.2 (+1 eps) [worker 0] episodes_seen=2540 last_return=-287.8 (+1 eps) [worker 1] episodes_seen=2540 last_return=-257.2 (+1 eps) [worker 0] episodes_seen=2550 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2550 last_return=-459.1 (+1 eps) [worker 0] episodes_seen=2560 last_return=-157.2 (+1 eps) [worker 1] episodes_seen=2560 last_return=-342.8 (+1 eps) [worker 0] episodes_seen=2570 last_return=-160.9 (+1 eps) [worker 1] episodes_seen=2570 last_return=-323.8 (+1 eps) [worker 0] episodes_seen=2580 last_return=-209.3 (+1 eps) [worker 1] episodes_seen=2580 last_return=-380.9 (+1 eps) [worker 0] episodes_seen=2590 last_return=-226.5 (+1 eps) [worker 1] episodes_seen=2590 last_return=-297.5 (+1 eps) [worker 0] episodes_seen=2600 last_return=-323.3 (+1 eps) [worker 1] episodes_seen=2600 last_return=-316.4 (+1 eps) [worker 0] episodes_seen=2610 last_return=-259.0 (+1 eps) [worker 1] episodes_seen=2610 last_return=-213.5 (+1 eps) [worker 0] episodes_seen=2620 last_return=-315.9 (+1 eps) [worker 1] episodes_seen=2620 last_return=-117.0 (+1 eps) [worker 0] episodes_seen=2630 last_return=-147.4 (+1 eps) [worker 1] episodes_seen=2630 last_return=-183.4 (+1 eps) [worker 0] episodes_seen=2640 last_return=-357.7 (+1 eps) [worker 1] episodes_seen=2640 last_return=-456.2 (+1 eps) [worker 0] episodes_seen=2650 last_return=-138.3 (+1 eps) [worker 1] episodes_seen=2650 last_return=-202.7 (+1 eps) [worker 0] episodes_seen=2660 last_return=-196.6 (+1 eps) [worker 1] episodes_seen=2660 last_return=-403.0 (+1 eps) [worker 0] episodes_seen=2670 last_return=-317.0 (+1 eps) [worker 1] episodes_seen=2670 last_return=-167.4 (+1 eps) [worker 0] episodes_seen=2680 last_return=-252.1 (+1 eps) [worker 1] episodes_seen=2680 last_return=-71.2 (+1 eps) [worker 0] episodes_seen=2690 last_return=-285.0 (+1 eps) [worker 1] episodes_seen=2690 last_return=-303.2 (+1 eps) [worker 0] episodes_seen=2700 last_return=-144.3 (+1 eps) [worker 1] episodes_seen=2700 last_return=-163.9 (+1 eps) [worker 0] episodes_seen=2710 last_return=-330.4 (+1 eps) [worker 1] episodes_seen=2710 last_return=-332.9 (+1 eps) [worker 0] episodes_seen=2720 last_return=-211.7 (+1 eps) [worker 1] episodes_seen=2720 last_return=-280.1 (+1 eps) [worker 0] episodes_seen=2730 last_return=-61.3 (+1 eps) [worker 1] episodes_seen=2730 last_return=-391.1 (+1 eps) [worker 0] episodes_seen=2740 last_return=-522.1 (+1 eps) [worker 1] episodes_seen=2740 last_return=-334.0 (+1 eps) [worker 0] episodes_seen=2750 last_return=-313.3 (+1 eps) [worker 1] episodes_seen=2750 last_return=-336.4 (+1 eps) [worker 0] episodes_seen=2760 last_return=-284.8 (+1 eps) [worker 1] episodes_seen=2760 last_return=-174.9 (+1 eps) [worker 0] episodes_seen=2770 last_return=-91.9 (+1 eps) [worker 1] episodes_seen=2770 last_return=-188.0 (+1 eps) [worker 0] episodes_seen=2780 last_return=-205.9 (+1 eps) [worker 1] episodes_seen=2780 last_return=-378.5 (+1 eps) [worker 0] episodes_seen=2790 last_return=-357.1 (+1 eps) [worker 1] episodes_seen=2790 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=2800 last_return=-363.6 (+1 eps) [worker 1] episodes_seen=2800 last_return=-165.2 (+1 eps) [worker 0] episodes_seen=2810 last_return=-460.7 (+1 eps) [worker 1] episodes_seen=2810 last_return=-140.2 (+1 eps) [worker 0] episodes_seen=2820 last_return=-71.3 (+1 eps) [worker 1] episodes_seen=2820 last_return=-264.5 (+1 eps) [worker 0] episodes_seen=2830 last_return=-379.0 (+1 eps) [A2C][sync] it= 9001 steps= 540060 (+ 60) avg10=-237.41 loss=671.501 pg=0.000 vf=1033.078 H=0.000 gn=7912.256 [worker 1] episodes_seen=2830 last_return=-429.1 (+1 eps) [worker 0] episodes_seen=2840 last_return=-137.1 (+1 eps) [worker 1] episodes_seen=2840 last_return=-141.1 (+1 eps) [worker 0] episodes_seen=2850 last_return=-126.4 (+1 eps) [worker 1] episodes_seen=2850 last_return=-314.9 (+1 eps) [worker 0] episodes_seen=2860 last_return=-69.2 (+1 eps) [worker 1] episodes_seen=2860 last_return=-91.2 (+1 eps) [worker 0] episodes_seen=2870 last_return=-223.9 (+1 eps) [worker 1] episodes_seen=2870 last_return=-317.7 (+1 eps) [worker 0] episodes_seen=2880 last_return=-135.0 (+1 eps) [worker 1] episodes_seen=2880 last_return=-155.2 (+1 eps) [worker 0] episodes_seen=2890 last_return=-265.4 (+1 eps) [worker 1] episodes_seen=2890 last_return=-79.1 (+1 eps) [worker 0] episodes_seen=2900 last_return=-190.5 (+1 eps) [worker 1] episodes_seen=2900 last_return=-261.5 (+1 eps) [worker 0] episodes_seen=2910 last_return=-195.9 (+1 eps) [worker 1] episodes_seen=2910 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=2920 last_return=-534.3 (+1 eps) [worker 1] episodes_seen=2920 last_return=-353.5 (+1 eps) [worker 0] episodes_seen=2930 last_return=-289.3 (+1 eps) [worker 1] episodes_seen=2930 last_return=-417.4 (+1 eps) [worker 1] episodes_seen=2940 last_return=-403.0 (+1 eps) [worker 0] episodes_seen=2940 last_return=-215.1 (+1 eps) [worker 1] episodes_seen=2950 last_return=-407.0 (+1 eps) [worker 0] episodes_seen=2950 last_return=-113.8 (+1 eps) [worker 1] episodes_seen=2960 last_return=-416.6 (+1 eps) [worker 0] episodes_seen=2960 last_return=-295.9 (+1 eps) [worker 1] episodes_seen=2970 last_return=-245.4 (+1 eps) [worker 0] episodes_seen=2970 last_return=-221.6 (+1 eps) [worker 1] episodes_seen=2980 last_return=-308.0 (+1 eps) [worker 0] episodes_seen=2980 last_return=-217.3 (+1 eps) [worker 1] episodes_seen=2990 last_return=-86.9 (+1 eps) [worker 0] episodes_seen=2990 last_return=-270.6 (+1 eps) [worker 1] episodes_seen=3000 last_return=-347.5 (+1 eps) [worker 0] episodes_seen=3000 last_return=-161.0 (+1 eps) [worker 0] episodes_seen=3010 last_return=-258.3 (+1 eps) [worker 1] episodes_seen=3010 last_return=-128.4 (+1 eps) [worker 0] episodes_seen=3020 last_return=-357.0 (+1 eps) [worker 1] episodes_seen=3020 last_return=-436.5 (+1 eps) [worker 0] episodes_seen=3030 last_return=-292.5 (+1 eps) [worker 1] episodes_seen=3030 last_return=-153.5 (+1 eps) [worker 0] episodes_seen=3040 last_return=-293.6 (+1 eps) [worker 1] episodes_seen=3040 last_return=-210.1 (+1 eps) [worker 1] episodes_seen=3050 last_return=-294.9 (+1 eps) [worker 0] episodes_seen=3050 last_return=-271.2 (+1 eps) [worker 0] episodes_seen=3060 last_return=-291.3 (+1 eps) [worker 1] episodes_seen=3060 last_return=-310.8 (+1 eps) [worker 0] episodes_seen=3070 last_return=-45.5 (+1 eps) [worker 1] episodes_seen=3070 last_return=-318.7 (+1 eps) [worker 0] episodes_seen=3080 last_return=-351.8 (+1 eps) [worker 1] episodes_seen=3080 last_return=-66.7 (+1 eps) [worker 0] episodes_seen=3090 last_return=-279.7 (+1 eps) [worker 1] episodes_seen=3090 last_return=-451.6 (+1 eps) [worker 0] episodes_seen=3100 last_return=-428.5 (+1 eps) [worker 1] episodes_seen=3100 last_return=-543.8 (+1 eps) [worker 0] episodes_seen=3110 last_return=-135.5 (+1 eps) [worker 1] episodes_seen=3110 last_return=-326.6 (+1 eps) [worker 0] episodes_seen=3120 last_return=-350.9 (+1 eps) [worker 1] episodes_seen=3120 last_return=-45.1 (+1 eps) [worker 0] episodes_seen=3130 last_return=-266.1 (+1 eps) [worker 1] episodes_seen=3130 last_return=-224.1 (+1 eps) [worker 0] episodes_seen=3140 last_return=-432.3 (+1 eps) [worker 1] episodes_seen=3140 last_return=-332.0 (+1 eps) [worker 0] episodes_seen=3150 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=3150 last_return=-378.4 (+1 eps) [worker 0] episodes_seen=3160 last_return=-389.7 (+1 eps) [worker 1] episodes_seen=3160 last_return=-243.4 (+1 eps) [worker 0] episodes_seen=3170 last_return=-350.5 (+1 eps) [worker 1] episodes_seen=3170 last_return=-340.4 (+1 eps) [worker 0] episodes_seen=3180 last_return=-154.2 (+1 eps) [worker 1] episodes_seen=3180 last_return=-504.2 (+1 eps) [worker 0] episodes_seen=3190 last_return=-222.2 (+1 eps) [worker 1] episodes_seen=3190 last_return=-160.5 (+1 eps) [worker 0] episodes_seen=3200 last_return=-215.8 (+1 eps) [worker 1] episodes_seen=3200 last_return=-455.3 (+1 eps) [worker 0] episodes_seen=3210 last_return=-430.8 (+1 eps) [worker 1] episodes_seen=3210 last_return=-243.4 (+1 eps) [worker 0] episodes_seen=3220 last_return=94.5 (+1 eps) [worker 1] episodes_seen=3220 last_return=-306.7 (+1 eps) [worker 0] episodes_seen=3230 last_return=-179.7 (+1 eps) [worker 1] episodes_seen=3230 last_return=-266.2 (+1 eps) [worker 0] episodes_seen=3240 last_return=-229.0 (+1 eps) [worker 1] episodes_seen=3240 last_return=-305.7 (+1 eps) [worker 0] episodes_seen=3250 last_return=-112.3 (+1 eps) [worker 1] episodes_seen=3250 last_return=-219.8 (+1 eps) [worker 0] episodes_seen=3260 last_return=-341.7 (+1 eps) [worker 1] episodes_seen=3260 last_return=-290.3 (+1 eps) [worker 0] episodes_seen=3270 last_return=-199.6 (+1 eps) [worker 1] episodes_seen=3270 last_return=-409.1 (+1 eps) [worker 0] episodes_seen=3280 last_return=-237.9 (+1 eps) [A2C][sync] it=10001 steps= 600060 (+ 60) avg10=-250.00 loss=80.400 pg=0.003 vf=123.688 H=0.012 gn=1894.437 [worker 1] episodes_seen=3280 last_return=-166.1 (+1 eps) [worker 0] episodes_seen=3290 last_return=-210.0 (+1 eps) [worker 1] episodes_seen=3290 last_return=-133.0 (+1 eps) [worker 0] episodes_seen=3300 last_return=-209.1 (+1 eps) [worker 1] episodes_seen=3300 last_return=-385.0 (+1 eps) [worker 0] episodes_seen=3310 last_return=-196.5 (+1 eps) [worker 1] episodes_seen=3310 last_return=-116.2 (+1 eps) [worker 0] episodes_seen=3320 last_return=-171.9 (+1 eps) [worker 1] episodes_seen=3320 last_return=-355.0 (+1 eps) [worker 0] episodes_seen=3330 last_return=-183.6 (+1 eps) [worker 1] episodes_seen=3330 last_return=-455.8 (+1 eps) [worker 0] episodes_seen=3340 last_return=-388.3 (+1 eps) [worker 1] episodes_seen=3340 last_return=-155.3 (+1 eps) [worker 0] episodes_seen=3350 last_return=-195.3 (+1 eps) [worker 1] episodes_seen=3350 last_return=-259.7 (+1 eps) [worker 0] episodes_seen=3360 last_return=-347.9 (+1 eps) [worker 1] episodes_seen=3360 last_return=-257.9 (+1 eps) [worker 0] episodes_seen=3370 last_return=-217.8 (+1 eps) [worker 1] episodes_seen=3370 last_return=-327.1 (+1 eps) [worker 0] episodes_seen=3380 last_return=-269.7 (+1 eps) [worker 1] episodes_seen=3380 last_return=-125.8 (+1 eps) [worker 0] episodes_seen=3390 last_return=-111.1 (+1 eps) [worker 1] episodes_seen=3390 last_return=-482.0 (+1 eps) [worker 0] episodes_seen=3400 last_return=-386.5 (+1 eps) [worker 1] episodes_seen=3400 last_return=-216.9 (+1 eps) [worker 0] episodes_seen=3410 last_return=-277.3 (+1 eps) [worker 1] episodes_seen=3410 last_return=-213.2 (+1 eps) [worker 0] episodes_seen=3420 last_return=-409.6 (+1 eps) [worker 1] episodes_seen=3420 last_return=-456.4 (+1 eps) [worker 0] episodes_seen=3430 last_return=-26.1 (+1 eps) [worker 1] episodes_seen=3430 last_return=-272.9 (+1 eps) [worker 0] episodes_seen=3440 last_return=-237.7 (+1 eps) [worker 1] episodes_seen=3440 last_return=-88.8 (+1 eps) [worker 0] episodes_seen=3450 last_return=-359.2 (+1 eps) [worker 1] episodes_seen=3450 last_return=-247.3 (+1 eps) [worker 0] episodes_seen=3460 last_return=-302.1 (+1 eps) [worker 1] episodes_seen=3460 last_return=-113.9 (+1 eps) [worker 0] episodes_seen=3470 last_return=-236.5 (+1 eps) [worker 1] episodes_seen=3470 last_return=-247.7 (+1 eps) [worker 0] episodes_seen=3480 last_return=-362.9 (+1 eps) [worker 1] episodes_seen=3480 last_return=-266.7 (+1 eps) [worker 0] episodes_seen=3490 last_return=-143.1 (+1 eps) [worker 1] episodes_seen=3490 last_return=-219.8 (+1 eps) [worker 0] episodes_seen=3500 last_return=-163.7 (+1 eps) [worker 1] episodes_seen=3500 last_return=-300.7 (+1 eps) [worker 0] episodes_seen=3510 last_return=-278.3 (+1 eps) [worker 1] episodes_seen=3510 last_return=-260.4 (+1 eps) [worker 0] episodes_seen=3520 last_return=-330.9 (+1 eps) [worker 1] episodes_seen=3520 last_return=-183.0 (+1 eps) [worker 0] episodes_seen=3530 last_return=-254.9 (+1 eps) [worker 1] episodes_seen=3530 last_return=-169.7 (+1 eps) [worker 0] episodes_seen=3540 last_return=-227.2 (+1 eps) [worker 1] episodes_seen=3540 last_return=-323.3 (+1 eps) [worker 0] episodes_seen=3550 last_return=-150.1 (+1 eps) [worker 1] episodes_seen=3550 last_return=-211.2 (+1 eps) [worker 0] episodes_seen=3560 last_return=-477.6 (+1 eps) [worker 1] episodes_seen=3560 last_return=-138.0 (+1 eps) [worker 0] episodes_seen=3570 last_return=-427.5 (+1 eps) [worker 1] episodes_seen=3570 last_return=-228.1 (+1 eps) [worker 0] episodes_seen=3580 last_return=-296.3 (+1 eps) [worker 1] episodes_seen=3580 last_return=-306.4 (+1 eps) [worker 0] episodes_seen=3590 last_return=-391.9 (+1 eps) [worker 1] episodes_seen=3590 last_return=-268.3 (+1 eps) [worker 0] episodes_seen=3600 last_return=-287.5 (+1 eps) [worker 1] episodes_seen=3600 last_return=-243.5 (+1 eps) [worker 0] episodes_seen=3610 last_return=-171.7 (+1 eps) [worker 1] episodes_seen=3610 last_return=-251.3 (+1 eps) [worker 0] episodes_seen=3620 last_return=-58.6 (+1 eps) [worker 1] episodes_seen=3620 last_return=-105.1 (+1 eps) [worker 0] episodes_seen=3630 last_return=-156.9 (+1 eps) [worker 1] episodes_seen=3630 last_return=-171.6 (+1 eps) [worker 0] episodes_seen=3640 last_return=-206.0 (+1 eps) [worker 1] episodes_seen=3640 last_return=-363.0 (+1 eps) [worker 0] episodes_seen=3650 last_return=-69.4 (+1 eps) [worker 0] episodes_seen=3660 last_return=-302.7 (+1 eps) [worker 1] episodes_seen=3650 last_return=-475.2 (+1 eps) [worker 1] episodes_seen=3660 last_return=0.8 (+1 eps) [worker 0] episodes_seen=3670 last_return=-138.2 (+1 eps) [worker 1] episodes_seen=3670 last_return=-281.1 (+1 eps) [worker 0] episodes_seen=3680 last_return=-245.3 (+1 eps) [worker 1] episodes_seen=3680 last_return=-22.8 (+1 eps) [worker 0] episodes_seen=3690 last_return=-318.3 (+1 eps) [worker 1] episodes_seen=3690 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=3700 last_return=-109.8 (+1 eps) [worker 1] episodes_seen=3700 last_return=-101.7 (+1 eps) [worker 0] episodes_seen=3710 last_return=-96.1 (+1 eps) [worker 1] episodes_seen=3710 last_return=-397.8 (+1 eps) [worker 0] episodes_seen=3720 last_return=-242.5 (+1 eps) [A2C][sync] it=11001 steps= 660060 (+ 60) avg10=-253.59 loss=15175.363 pg=-0.056 vf=23346.805 H=0.346 gn=20583.605 [worker 1] episodes_seen=3720 last_return=-379.5 (+1 eps) [worker 0] episodes_seen=3730 last_return=-237.9 (+1 eps) [worker 1] episodes_seen=3730 last_return=-331.0 (+1 eps) [worker 0] episodes_seen=3740 last_return=-188.3 (+1 eps) [worker 1] episodes_seen=3740 last_return=-250.6 (+1 eps) [worker 0] episodes_seen=3750 last_return=-163.6 (+1 eps) [worker 1] episodes_seen=3750 last_return=-146.5 (+1 eps) [worker 0] episodes_seen=3760 last_return=-220.0 (+1 eps) [worker 1] episodes_seen=3760 last_return=-156.1 (+1 eps) [worker 0] episodes_seen=3770 last_return=-132.2 (+1 eps) [worker 1] episodes_seen=3770 last_return=-185.5 (+1 eps) [worker 0] episodes_seen=3780 last_return=-101.4 (+1 eps) [worker 1] episodes_seen=3780 last_return=-267.9 (+1 eps) [worker 0] episodes_seen=3790 last_return=-173.8 (+1 eps) [worker 1] episodes_seen=3790 last_return=-343.0 (+1 eps) [worker 0] episodes_seen=3800 last_return=-305.5 (+1 eps) [worker 1] episodes_seen=3800 last_return=-121.1 (+1 eps) [worker 0] episodes_seen=3810 last_return=-163.8 (+1 eps) [worker 1] episodes_seen=3810 last_return=-219.6 (+1 eps) [worker 0] episodes_seen=3820 last_return=-170.4 (+1 eps) [worker 0] episodes_seen=3830 last_return=-365.8 (+1 eps) [worker 1] episodes_seen=3820 last_return=-114.4 (+1 eps) [worker 0] episodes_seen=3840 last_return=-295.6 (+1 eps) [worker 1] episodes_seen=3830 last_return=-336.9 (+1 eps) [worker 0] episodes_seen=3850 last_return=-340.5 (+1 eps) [worker 1] episodes_seen=3840 last_return=-186.7 (+1 eps) [worker 1] episodes_seen=3850 last_return=-205.3 (+1 eps) [worker 0] episodes_seen=3860 last_return=-101.1 (+1 eps) [worker 1] episodes_seen=3860 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=3870 last_return=-431.3 (+1 eps) [worker 1] episodes_seen=3870 last_return=-412.1 (+1 eps) [worker 0] episodes_seen=3880 last_return=-382.2 (+1 eps) [worker 1] episodes_seen=3880 last_return=-230.4 (+1 eps) [worker 0] episodes_seen=3890 last_return=-212.7 (+1 eps) [worker 1] episodes_seen=3890 last_return=-362.1 (+1 eps) [worker 0] episodes_seen=3900 last_return=-329.1 (+1 eps) [worker 1] episodes_seen=3900 last_return=-224.5 (+1 eps) [worker 0] episodes_seen=3910 last_return=-399.3 (+1 eps) [worker 1] episodes_seen=3910 last_return=-364.8 (+1 eps) [worker 0] episodes_seen=3920 last_return=-284.4 (+1 eps) [worker 1] episodes_seen=3920 last_return=57.0 (+1 eps) [worker 0] episodes_seen=3930 last_return=-359.2 (+1 eps) [worker 1] episodes_seen=3930 last_return=-433.1 (+1 eps) [worker 0] episodes_seen=3940 last_return=-166.8 (+1 eps) [worker 1] episodes_seen=3940 last_return=-278.8 (+1 eps) [worker 0] episodes_seen=3950 last_return=61.2 (+1 eps) [worker 1] episodes_seen=3950 last_return=-421.2 (+1 eps) [worker 0] episodes_seen=3960 last_return=-112.5 (+1 eps) [worker 1] episodes_seen=3960 last_return=-329.1 (+1 eps) [worker 0] episodes_seen=3970 last_return=-195.9 (+1 eps) [worker 1] episodes_seen=3970 last_return=-242.7 (+1 eps) [worker 0] episodes_seen=3980 last_return=-384.8 (+1 eps) [worker 1] episodes_seen=3980 last_return=-393.7 (+1 eps) [worker 0] episodes_seen=3990 last_return=-325.1 (+1 eps) [worker 1] episodes_seen=3990 last_return=-296.8 (+1 eps) [worker 0] episodes_seen=4000 last_return=-385.4 (+1 eps) [worker 1] episodes_seen=4000 last_return=-203.7 (+1 eps) [worker 0] episodes_seen=4010 last_return=-350.7 (+1 eps) [worker 1] episodes_seen=4010 last_return=-328.7 (+1 eps) [worker 0] episodes_seen=4020 last_return=-132.3 (+1 eps) [worker 1] episodes_seen=4020 last_return=-419.1 (+1 eps) [worker 0] episodes_seen=4030 last_return=-160.7 (+1 eps) [worker 1] episodes_seen=4030 last_return=-29.9 (+1 eps) [worker 0] episodes_seen=4040 last_return=-349.7 (+1 eps) [worker 1] episodes_seen=4040 last_return=-290.3 (+1 eps) [worker 0] episodes_seen=4050 last_return=-304.9 (+1 eps) [worker 1] episodes_seen=4050 last_return=-139.7 (+1 eps) [worker 0] episodes_seen=4060 last_return=-348.1 (+1 eps) [worker 1] episodes_seen=4060 last_return=-336.2 (+1 eps) [worker 0] episodes_seen=4070 last_return=-112.3 (+1 eps) [worker 1] episodes_seen=4070 last_return=-279.1 (+1 eps) [worker 0] episodes_seen=4080 last_return=-123.9 (+1 eps) [worker 1] episodes_seen=4080 last_return=-76.8 (+1 eps) [worker 0] episodes_seen=4090 last_return=-103.2 (+1 eps) [worker 1] episodes_seen=4090 last_return=-65.3 (+1 eps) [worker 0] episodes_seen=4100 last_return=-197.7 (+1 eps) [worker 1] episodes_seen=4100 last_return=-298.0 (+1 eps) [worker 0] episodes_seen=4110 last_return=-170.1 (+1 eps) [worker 1] episodes_seen=4110 last_return=-150.7 (+1 eps) [worker 0] episodes_seen=4120 last_return=-226.6 (+1 eps) [worker 1] episodes_seen=4120 last_return=-162.7 (+1 eps) [worker 0] episodes_seen=4130 last_return=-165.6 (+1 eps) [worker 1] episodes_seen=4130 last_return=-91.7 (+1 eps) [worker 0] episodes_seen=4140 last_return=-143.6 (+1 eps) [worker 1] episodes_seen=4140 last_return=-87.6 (+1 eps) [worker 0] episodes_seen=4150 last_return=-80.8 (+1 eps) [worker 1] episodes_seen=4150 last_return=-163.6 (+1 eps) [A2C][sync] it=12001 steps= 720060 (+ 60) avg10=-178.08 loss=1711.825 pg=-0.060 vf=2633.678 H=0.591 gn=6450.577 [worker 0] episodes_seen=4160 last_return=-85.7 (+1 eps) [worker 1] episodes_seen=4160 last_return=-307.7 (+1 eps) [worker 0] episodes_seen=4170 last_return=-303.1 (+1 eps) [worker 1] episodes_seen=4170 last_return=-301.7 (+1 eps) [worker 0] episodes_seen=4180 last_return=-254.4 (+1 eps) [worker 1] episodes_seen=4180 last_return=-91.7 (+1 eps) [worker 0] episodes_seen=4190 last_return=-87.0 (+1 eps) [worker 1] episodes_seen=4190 last_return=-52.1 (+1 eps) [worker 0] episodes_seen=4200 last_return=-82.2 (+1 eps) [worker 1] episodes_seen=4200 last_return=-83.9 (+1 eps) [worker 0] episodes_seen=4210 last_return=-19.1 (+1 eps) [worker 1] episodes_seen=4210 last_return=-155.3 (+1 eps) [worker 0] episodes_seen=4220 last_return=-91.3 (+1 eps) [worker 1] episodes_seen=4220 last_return=-54.7 (+1 eps) [worker 0] episodes_seen=4230 last_return=-51.5 (+1 eps) [worker 1] episodes_seen=4230 last_return=-113.2 (+1 eps) [worker 0] episodes_seen=4240 last_return=-89.6 (+1 eps) [worker 1] episodes_seen=4240 last_return=-64.4 (+1 eps) [worker 0] episodes_seen=4250 last_return=-189.9 (+1 eps) [worker 1] episodes_seen=4250 last_return=-102.5 (+1 eps) [worker 0] episodes_seen=4260 last_return=-128.5 (+1 eps) [worker 1] episodes_seen=4260 last_return=-126.6 (+1 eps) [worker 0] episodes_seen=4270 last_return=-93.9 (+1 eps) [worker 1] episodes_seen=4270 last_return=-78.3 (+1 eps) [worker 0] episodes_seen=4280 last_return=-286.7 (+1 eps) [worker 1] episodes_seen=4280 last_return=-320.7 (+1 eps) [worker 0] episodes_seen=4290 last_return=-128.8 (+1 eps) [worker 1] episodes_seen=4290 last_return=-277.4 (+1 eps) [worker 0] episodes_seen=4300 last_return=-113.3 (+1 eps) [worker 1] episodes_seen=4300 last_return=-497.4 (+1 eps) [worker 0] episodes_seen=4310 last_return=98.0 (+1 eps) [worker 1] episodes_seen=4310 last_return=-119.9 (+1 eps) [worker 0] episodes_seen=4320 last_return=-165.2 (+1 eps) [worker 1] episodes_seen=4320 last_return=-184.5 (+1 eps) [worker 0] episodes_seen=4330 last_return=-224.7 (+1 eps) [worker 1] episodes_seen=4330 last_return=-203.3 (+1 eps) [worker 0] episodes_seen=4340 last_return=-172.0 (+1 eps) [worker 1] episodes_seen=4340 last_return=-181.3 (+1 eps) [worker 0] episodes_seen=4350 last_return=-374.2 (+1 eps) [worker 1] episodes_seen=4350 last_return=-124.2 (+1 eps) [worker 0] episodes_seen=4360 last_return=-108.2 (+1 eps) [worker 1] episodes_seen=4360 last_return=-65.3 (+1 eps) [worker 0] episodes_seen=4370 last_return=-46.0 (+1 eps) [worker 1] episodes_seen=4370 last_return=-50.9 (+1 eps) [worker 0] episodes_seen=4380 last_return=-118.3 (+1 eps) [worker 1] episodes_seen=4380 last_return=-69.7 (+1 eps) [worker 0] episodes_seen=4390 last_return=-99.7 (+1 eps) [worker 1] episodes_seen=4390 last_return=-84.1 (+1 eps) [worker 0] episodes_seen=4400 last_return=-110.7 (+1 eps) [worker 1] episodes_seen=4400 last_return=-101.5 (+1 eps) [worker 0] episodes_seen=4410 last_return=-74.8 (+1 eps) [worker 1] episodes_seen=4410 last_return=-102.3 (+1 eps) [worker 0] episodes_seen=4420 last_return=-130.0 (+1 eps) [worker 1] episodes_seen=4420 last_return=-97.6 (+1 eps) [worker 0] episodes_seen=4430 last_return=-192.4 (+1 eps) [worker 1] episodes_seen=4430 last_return=-226.2 (+1 eps) [worker 0] episodes_seen=4440 last_return=-245.6 (+1 eps) [worker 1] episodes_seen=4440 last_return=65.3 (+1 eps) [worker 0] episodes_seen=4450 last_return=-123.9 (+1 eps) [worker 1] episodes_seen=4450 last_return=-99.5 (+1 eps) [worker 0] episodes_seen=4460 last_return=-83.1 (+1 eps) [worker 1] episodes_seen=4460 last_return=-99.5 (+1 eps) [worker 0] episodes_seen=4470 last_return=-381.2 (+1 eps) [worker 1] episodes_seen=4470 last_return=-151.0 (+1 eps) [worker 0] episodes_seen=4480 last_return=-314.4 (+1 eps) [worker 1] episodes_seen=4480 last_return=-5.0 (+1 eps) [worker 0] episodes_seen=4490 last_return=-425.4 (+1 eps) [worker 1] episodes_seen=4490 last_return=-266.6 (+1 eps) [worker 0] episodes_seen=4500 last_return=-140.7 (+1 eps) [worker 1] episodes_seen=4500 last_return=-315.4 (+1 eps) [worker 0] episodes_seen=4510 last_return=-172.9 (+1 eps) [worker 1] episodes_seen=4510 last_return=-182.3 (+1 eps) [worker 0] episodes_seen=4520 last_return=-145.0 (+1 eps) [worker 1] episodes_seen=4520 last_return=-320.6 (+1 eps) [worker 0] episodes_seen=4530 last_return=-256.9 (+1 eps) [worker 1] episodes_seen=4530 last_return=-357.6 (+1 eps) [worker 0] episodes_seen=4540 last_return=-318.0 (+1 eps) [worker 1] episodes_seen=4540 last_return=-119.2 (+1 eps) [worker 1] episodes_seen=4550 last_return=-390.2 (+1 eps) [worker 0] episodes_seen=4550 last_return=-113.9 (+1 eps) [worker 1] episodes_seen=4560 last_return=-360.7 (+1 eps) [worker 0] episodes_seen=4560 last_return=-158.2 (+1 eps) [A2C][sync] it=13001 steps= 780060 (+ 60) avg10=-176.66 loss=1638.220 pg=0.020 vf=2520.317 H=0.585 gn=9398.021 [worker 0] episodes_seen=4570 last_return=-70.5 (+1 eps) [worker 1] episodes_seen=4570 last_return=-91.5 (+1 eps) [worker 0] episodes_seen=4580 last_return=-69.2 (+1 eps) [worker 1] episodes_seen=4580 last_return=-86.7 (+1 eps) [worker 0] episodes_seen=4590 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=4590 last_return=-68.3 (+1 eps) [worker 0] episodes_seen=4600 last_return=-123.4 (+1 eps) [worker 1] episodes_seen=4600 last_return=-111.7 (+1 eps) [worker 1] episodes_seen=4610 last_return=-71.4 (+1 eps) [worker 0] episodes_seen=4610 last_return=-114.2 (+1 eps) [worker 0] episodes_seen=4620 last_return=33.6 (+1 eps) [worker 1] episodes_seen=4620 last_return=38.0 (+1 eps) [worker 0] episodes_seen=4630 last_return=-89.9 (+1 eps) [worker 1] episodes_seen=4630 last_return=-165.3 (+1 eps) [worker 0] episodes_seen=4640 last_return=-46.2 (+1 eps) [worker 1] episodes_seen=4640 last_return=-92.2 (+1 eps) [worker 0] episodes_seen=4650 last_return=-77.9 (+1 eps) [worker 1] episodes_seen=4650 last_return=-97.4 (+1 eps) [worker 0] episodes_seen=4660 last_return=-110.1 (+1 eps) [worker 1] episodes_seen=4660 last_return=-137.3 (+1 eps) [worker 0] episodes_seen=4670 last_return=-91.6 (+1 eps) [worker 1] episodes_seen=4670 last_return=-64.7 (+1 eps) [worker 0] episodes_seen=4680 last_return=-109.9 (+1 eps) [worker 1] episodes_seen=4680 last_return=-113.0 (+1 eps) [worker 0] episodes_seen=4690 last_return=-327.4 (+1 eps) [worker 1] episodes_seen=4690 last_return=-332.0 (+1 eps) [worker 1] episodes_seen=4700 last_return=-102.8 (+1 eps) [worker 0] episodes_seen=4700 last_return=-166.6 (+1 eps) [worker 1] episodes_seen=4710 last_return=-102.2 (+1 eps) [worker 0] episodes_seen=4710 last_return=-92.8 (+1 eps) [worker 1] episodes_seen=4720 last_return=-29.6 (+1 eps) [worker 0] episodes_seen=4720 last_return=-58.8 (+1 eps) [worker 1] episodes_seen=4730 last_return=-157.4 (+1 eps) [worker 0] episodes_seen=4730 last_return=-77.5 (+1 eps) [worker 1] episodes_seen=4740 last_return=-97.3 (+1 eps) [worker 0] episodes_seen=4740 last_return=-94.4 (+1 eps) [worker 1] episodes_seen=4750 last_return=-77.7 (+1 eps) [worker 0] episodes_seen=4750 last_return=-204.6 (+1 eps) [worker 0] episodes_seen=4760 last_return=-192.6 (+1 eps) [worker 1] episodes_seen=4760 last_return=-303.2 (+1 eps) [worker 0] episodes_seen=4770 last_return=-72.9 (+1 eps) [worker 1] episodes_seen=4770 last_return=-90.1 (+1 eps) [worker 0] episodes_seen=4780 last_return=-119.0 (+1 eps) [worker 1] episodes_seen=4780 last_return=-77.0 (+1 eps) [worker 0] episodes_seen=4790 last_return=-80.7 (+1 eps) [worker 1] episodes_seen=4790 last_return=-57.5 (+1 eps) [worker 0] episodes_seen=4800 last_return=-149.6 (+1 eps) [worker 1] episodes_seen=4800 last_return=-163.1 (+1 eps) [worker 0] episodes_seen=4810 last_return=-75.5 (+1 eps) [worker 1] episodes_seen=4810 last_return=-129.6 (+1 eps) [worker 0] episodes_seen=4820 last_return=-112.8 (+1 eps) [worker 1] episodes_seen=4820 last_return=-163.3 (+1 eps) [worker 0] episodes_seen=4830 last_return=-75.9 (+1 eps) [worker 1] episodes_seen=4830 last_return=-129.6 (+1 eps) [worker 0] episodes_seen=4840 last_return=-64.4 (+1 eps) [worker 1] episodes_seen=4840 last_return=-96.9 (+1 eps) [worker 0] episodes_seen=4850 last_return=-74.0 (+1 eps) [worker 1] episodes_seen=4850 last_return=-287.9 (+1 eps) [worker 0] episodes_seen=4860 last_return=-328.9 (+1 eps) [worker 1] episodes_seen=4860 last_return=-220.0 (+1 eps) [worker 0] episodes_seen=4870 last_return=-213.5 (+1 eps) [worker 1] episodes_seen=4870 last_return=-38.8 (+1 eps) [worker 0] episodes_seen=4880 last_return=-73.7 (+1 eps) [worker 1] episodes_seen=4880 last_return=-90.1 (+1 eps) [worker 0] episodes_seen=4890 last_return=-68.8 (+1 eps) [worker 1] episodes_seen=4890 last_return=-68.5 (+1 eps) [worker 0] episodes_seen=4900 last_return=-170.9 (+1 eps) [A2C][sync] it=14001 steps= 840060 (+ 60) avg10=-110.94 loss=84.783 pg=0.015 vf=130.427 H=0.998 gn=1435.815 [worker 1] episodes_seen=4900 last_return=-90.4 (+1 eps) [worker 0] episodes_seen=4910 last_return=-29.0 (+1 eps) [worker 1] episodes_seen=4910 last_return=-117.4 (+1 eps) [worker 0] episodes_seen=4920 last_return=70.7 (+1 eps) [worker 1] episodes_seen=4920 last_return=-63.7 (+1 eps) [worker 0] episodes_seen=4930 last_return=-118.7 (+1 eps) [worker 1] episodes_seen=4930 last_return=-46.8 (+1 eps) [worker 0] episodes_seen=4940 last_return=-154.9 (+1 eps) [worker 1] episodes_seen=4940 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=4950 last_return=-97.5 (+1 eps) [worker 1] episodes_seen=4950 last_return=-138.5 (+1 eps) [worker 0] episodes_seen=4960 last_return=-58.5 (+1 eps) [worker 1] episodes_seen=4960 last_return=-101.2 (+1 eps) [worker 0] episodes_seen=4970 last_return=-84.1 (+1 eps) [worker 1] episodes_seen=4970 last_return=-89.4 (+1 eps) [worker 0] episodes_seen=4980 last_return=-146.6 (+1 eps) [worker 1] episodes_seen=4980 last_return=-67.8 (+1 eps) [worker 0] episodes_seen=4990 last_return=-39.0 (+1 eps) [worker 1] episodes_seen=4990 last_return=-42.5 (+1 eps) [worker 0] episodes_seen=5000 last_return=-110.6 (+1 eps) [worker 1] episodes_seen=5000 last_return=-111.7 (+1 eps) [worker 0] episodes_seen=5010 last_return=-264.6 (+1 eps) [worker 1] episodes_seen=5010 last_return=-66.2 (+1 eps) [worker 0] episodes_seen=5020 last_return=-77.4 (+1 eps) [worker 1] episodes_seen=5020 last_return=3.2 (+1 eps) [worker 0] episodes_seen=5030 last_return=-76.5 (+1 eps) [worker 1] episodes_seen=5030 last_return=-76.6 (+1 eps) [worker 0] episodes_seen=5040 last_return=-70.6 (+1 eps) [worker 1] episodes_seen=5040 last_return=-50.2 (+1 eps) [worker 0] episodes_seen=5050 last_return=-81.3 (+1 eps) [worker 1] episodes_seen=5050 last_return=-100.4 (+1 eps) [worker 1] episodes_seen=5060 last_return=-155.7 (+1 eps) [worker 0] episodes_seen=5060 last_return=-229.0 (+1 eps) [worker 1] episodes_seen=5070 last_return=-298.4 (+1 eps) [worker 0] episodes_seen=5070 last_return=-71.9 (+1 eps) [worker 1] episodes_seen=5080 last_return=-51.6 (+1 eps) [worker 0] episodes_seen=5080 last_return=-85.0 (+1 eps) [worker 1] episodes_seen=5090 last_return=-78.8 (+1 eps) [worker 0] episodes_seen=5090 last_return=-98.5 (+1 eps) [worker 1] episodes_seen=5100 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=5100 last_return=-106.4 (+1 eps) [A2C][sync] it=15001 steps= 900060 (+ 60) avg10=-106.34 loss=50.366 pg=0.141 vf=77.280 H=0.685 gn=1773.043 [worker 1] episodes_seen=5110 last_return=-95.3 (+1 eps) [worker 0] episodes_seen=5110 last_return=-85.2 (+1 eps) [worker 1] episodes_seen=5120 last_return=-73.2 (+1 eps) [worker 0] episodes_seen=5120 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=5130 last_return=-137.0 (+1 eps) [worker 1] episodes_seen=5140 last_return=-113.1 (+1 eps) [worker 0] episodes_seen=5130 last_return=-93.4 (+1 eps) [worker 1] episodes_seen=5150 last_return=-189.8 (+1 eps) [worker 0] episodes_seen=5140 last_return=-108.2 (+1 eps) [worker 1] episodes_seen=5160 last_return=-86.9 (+1 eps) [worker 0] episodes_seen=5150 last_return=-1.9 (+1 eps) [worker 1] episodes_seen=5170 last_return=-129.0 (+1 eps) [worker 0] episodes_seen=5160 last_return=-381.9 (+1 eps) [worker 1] episodes_seen=5180 last_return=-194.1 (+1 eps) [worker 0] episodes_seen=5170 last_return=-120.9 (+1 eps) [worker 1] episodes_seen=5190 last_return=-94.8 (+1 eps) [worker 1] episodes_seen=5200 last_return=-74.7 (+1 eps) [worker 0] episodes_seen=5180 last_return=-0.5 (+1 eps) [worker 1] episodes_seen=5210 last_return=-100.2 (+1 eps) [worker 0] episodes_seen=5190 last_return=-149.0 (+1 eps) [worker 1] episodes_seen=5220 last_return=-109.7 (+1 eps) [worker 0] episodes_seen=5200 last_return=-103.4 (+1 eps) [worker 0] episodes_seen=5210 last_return=-55.3 (+1 eps) [worker 1] episodes_seen=5230 last_return=-21.9 (+1 eps) [worker 0] episodes_seen=5220 last_return=-62.9 (+1 eps) [worker 1] episodes_seen=5240 last_return=-63.8 (+1 eps) [worker 0] episodes_seen=5230 last_return=-92.1 (+1 eps) [worker 1] episodes_seen=5250 last_return=-82.3 (+1 eps) [worker 0] episodes_seen=5240 last_return=-75.3 (+1 eps) [worker 1] episodes_seen=5260 last_return=-56.2 (+1 eps) [worker 0] episodes_seen=5250 last_return=-4.4 (+1 eps) [worker 1] episodes_seen=5270 last_return=-76.1 (+1 eps) [worker 0] episodes_seen=5260 last_return=60.6 (+1 eps) [worker 1] episodes_seen=5280 last_return=-92.7 (+1 eps) [worker 0] episodes_seen=5270 last_return=-77.7 (+1 eps) [A2C][sync] it=16001 steps= 960060 (+ 60) avg10= -64.51 loss=73.684 pg=-0.142 vf=113.588 H=0.633 gn=552.003 [worker 1] episodes_seen=5290 last_return=-87.4 (+1 eps) [worker 0] episodes_seen=5280 last_return=-68.5 (+1 eps) [worker 1] episodes_seen=5300 last_return=-51.5 (+1 eps) [worker 0] episodes_seen=5290 last_return=-62.5 (+1 eps) [worker 1] episodes_seen=5310 last_return=-107.0 (+1 eps) [worker 1] episodes_seen=5320 last_return=-79.6 (+1 eps) [worker 0] episodes_seen=5300 last_return=-98.7 (+1 eps) [worker 1] episodes_seen=5330 last_return=-104.8 (+1 eps) [worker 0] episodes_seen=5310 last_return=-68.2 (+1 eps) [worker 0] episodes_seen=5320 last_return=-102.6 (+1 eps) [worker 1] episodes_seen=5340 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=5350 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=5330 last_return=-89.3 (+1 eps) [worker 1] episodes_seen=5360 last_return=-117.5 (+1 eps) [worker 0] episodes_seen=5340 last_return=-65.7 (+1 eps) [worker 1] episodes_seen=5370 last_return=-24.9 (+1 eps) [worker 0] episodes_seen=5350 last_return=-57.8 (+1 eps) [worker 1] episodes_seen=5380 last_return=-0.8 (+1 eps) [worker 0] episodes_seen=5360 last_return=-107.6 (+1 eps) [worker 1] episodes_seen=5390 last_return=-79.3 (+1 eps) [worker 0] episodes_seen=5370 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=5400 last_return=-54.9 (+1 eps) [worker 0] episodes_seen=5380 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=5410 last_return=-93.4 (+1 eps) [worker 0] episodes_seen=5390 last_return=-99.3 (+1 eps) [worker 1] episodes_seen=5420 last_return=-72.6 (+1 eps) [worker 0] episodes_seen=5400 last_return=-70.9 (+1 eps) [worker 1] episodes_seen=5430 last_return=-78.5 (+1 eps) [worker 0] episodes_seen=5410 last_return=-92.4 (+1 eps) [worker 1] episodes_seen=5440 last_return=-94.9 (+1 eps) [worker 0] episodes_seen=5420 last_return=-72.0 (+1 eps) [worker 1] episodes_seen=5450 last_return=-65.3 (+1 eps) [worker 0] episodes_seen=5430 last_return=-80.4 (+1 eps) [worker 1] episodes_seen=5460 last_return=-81.1 (+1 eps) [worker 0] episodes_seen=5440 last_return=-94.6 (+1 eps) [worker 1] episodes_seen=5470 last_return=-80.2 (+1 eps) [worker 0] episodes_seen=5450 last_return=-93.7 (+1 eps) [worker 1] episodes_seen=5480 last_return=-80.3 (+1 eps) [worker 0] episodes_seen=5460 last_return=-59.3 (+1 eps) [worker 1] episodes_seen=5490 last_return=-101.2 (+1 eps) [worker 0] episodes_seen=5470 last_return=-85.9 (+1 eps) [worker 1] episodes_seen=5500 last_return=-80.0 (+1 eps) [worker 0] episodes_seen=5480 last_return=-94.2 (+1 eps) [worker 1] episodes_seen=5510 last_return=-110.4 (+1 eps) [worker 0] episodes_seen=5490 last_return=-98.4 (+1 eps) [worker 1] episodes_seen=5520 last_return=-70.0 (+1 eps) [worker 0] episodes_seen=5500 last_return=-62.6 (+1 eps) [worker 1] episodes_seen=5530 last_return=-35.2 (+1 eps) [worker 0] episodes_seen=5510 last_return=-63.4 (+1 eps) [worker 0] episodes_seen=5520 last_return=-108.3 (+1 eps) [worker 1] episodes_seen=5540 last_return=-76.9 (+1 eps) [A2C][sync] it=17001 steps= 1020060 (+ 60) avg10= -97.32 loss=107.681 pg=-0.012 vf=165.695 H=0.805 gn=578.819 [worker 1] episodes_seen=5550 last_return=-146.4 (+1 eps) [worker 0] episodes_seen=5530 last_return=60.7 (+1 eps) [worker 1] episodes_seen=5560 last_return=-34.3 (+1 eps) [worker 0] episodes_seen=5540 last_return=-63.7 (+1 eps) [worker 1] episodes_seen=5570 last_return=-104.2 (+1 eps) [worker 0] episodes_seen=5550 last_return=-95.7 (+1 eps) [worker 1] episodes_seen=5580 last_return=-135.4 (+1 eps) [worker 0] episodes_seen=5560 last_return=-92.0 (+1 eps) [worker 1] episodes_seen=5590 last_return=-103.7 (+1 eps) [worker 0] episodes_seen=5570 last_return=-84.4 (+1 eps) [worker 1] episodes_seen=5600 last_return=-108.6 (+1 eps) [worker 0] episodes_seen=5580 last_return=-98.1 (+1 eps) [worker 1] episodes_seen=5610 last_return=-95.3 (+1 eps) [worker 0] episodes_seen=5590 last_return=-92.2 (+1 eps) [worker 1] episodes_seen=5620 last_return=-59.8 (+1 eps) [worker 0] episodes_seen=5600 last_return=-86.1 (+1 eps) [worker 1] episodes_seen=5630 last_return=-95.5 (+1 eps) [worker 0] episodes_seen=5610 last_return=-79.9 (+1 eps) [worker 1] episodes_seen=5640 last_return=-44.5 (+1 eps) [worker 0] episodes_seen=5620 last_return=-3.9 (+1 eps) [worker 1] episodes_seen=5650 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=5630 last_return=27.4 (+1 eps) [worker 1] episodes_seen=5660 last_return=-116.7 (+1 eps) [worker 0] episodes_seen=5640 last_return=-73.5 (+1 eps) [worker 1] episodes_seen=5670 last_return=-88.2 (+1 eps) [worker 0] episodes_seen=5650 last_return=-101.7 (+1 eps) [worker 1] episodes_seen=5680 last_return=-147.4 (+1 eps) [worker 0] episodes_seen=5660 last_return=-45.5 (+1 eps) [worker 1] episodes_seen=5690 last_return=-54.1 (+1 eps) [worker 0] episodes_seen=5670 last_return=-3.0 (+1 eps) [worker 1] episodes_seen=5700 last_return=-8.4 (+1 eps) [worker 1] episodes_seen=5710 last_return=-239.2 (+1 eps) [worker 0] episodes_seen=5680 last_return=-111.7 (+1 eps) [worker 1] episodes_seen=5720 last_return=-1.1 (+1 eps) [worker 0] episodes_seen=5690 last_return=-181.6 (+1 eps) [worker 1] episodes_seen=5730 last_return=24.6 (+1 eps) [worker 0] episodes_seen=5700 last_return=-67.4 (+1 eps) [worker 1] episodes_seen=5740 last_return=-10.5 (+1 eps) [worker 0] episodes_seen=5710 last_return=-38.9 (+1 eps) [worker 1] episodes_seen=5750 last_return=-72.2 (+1 eps) [worker 0] episodes_seen=5720 last_return=-64.4 (+1 eps) [worker 1] episodes_seen=5760 last_return=-67.4 (+1 eps) [worker 0] episodes_seen=5730 last_return=15.3 (+1 eps) [worker 1] episodes_seen=5770 last_return=-71.1 (+1 eps) [worker 1] episodes_seen=5780 last_return=-111.7 (+1 eps) [worker 0] episodes_seen=5740 last_return=-100.0 (+1 eps) [A2C][sync] it=18001 steps= 1080060 (+ 60) avg10= -89.83 loss=50.931 pg=-0.023 vf=78.403 H=0.797 gn=634.193 [worker 0] episodes_seen=5750 last_return=-82.4 (+1 eps) [worker 1] episodes_seen=5790 last_return=-59.0 (+1 eps) [worker 0] episodes_seen=5760 last_return=-68.9 (+1 eps) [worker 1] episodes_seen=5800 last_return=-68.8 (+1 eps) [worker 0] episodes_seen=5770 last_return=-28.3 (+1 eps) [worker 0] episodes_seen=5780 last_return=19.4 (+1 eps) [worker 1] episodes_seen=5810 last_return=-5.7 (+1 eps) [worker 0] episodes_seen=5790 last_return=-44.8 (+1 eps) [worker 1] episodes_seen=5820 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=5800 last_return=-60.2 (+1 eps) [worker 1] episodes_seen=5830 last_return=-71.9 (+1 eps) [worker 0] episodes_seen=5810 last_return=-84.4 (+1 eps) [worker 1] episodes_seen=5840 last_return=-100.3 (+1 eps) [worker 0] episodes_seen=5820 last_return=-105.7 (+1 eps) [worker 1] episodes_seen=5850 last_return=-24.3 (+1 eps) [worker 0] episodes_seen=5830 last_return=-44.1 (+1 eps) [worker 1] episodes_seen=5860 last_return=-88.3 (+1 eps) [worker 0] episodes_seen=5840 last_return=-91.7 (+1 eps) [worker 1] episodes_seen=5870 last_return=-105.8 (+1 eps) [worker 0] episodes_seen=5850 last_return=-99.6 (+1 eps) [worker 1] episodes_seen=5880 last_return=-79.9 (+1 eps) [worker 0] episodes_seen=5860 last_return=-115.7 (+1 eps) [worker 1] episodes_seen=5890 last_return=-63.7 (+1 eps) [worker 0] episodes_seen=5870 last_return=-69.1 (+1 eps) [worker 1] episodes_seen=5900 last_return=-99.0 (+1 eps) [worker 0] episodes_seen=5880 last_return=-112.2 (+1 eps) [worker 1] episodes_seen=5910 last_return=-101.6 (+1 eps) [worker 0] episodes_seen=5890 last_return=-90.8 (+1 eps) [worker 1] episodes_seen=5920 last_return=-94.8 (+1 eps) [worker 0] episodes_seen=5900 last_return=-80.9 (+1 eps) [worker 1] episodes_seen=5930 last_return=-126.9 (+1 eps) [worker 0] episodes_seen=5910 last_return=-88.3 (+1 eps) [worker 1] episodes_seen=5940 last_return=-102.1 (+1 eps) [worker 0] episodes_seen=5920 last_return=-111.4 (+1 eps) [worker 1] episodes_seen=5950 last_return=-100.3 (+1 eps) [worker 0] episodes_seen=5930 last_return=-102.6 (+1 eps) [worker 1] episodes_seen=5960 last_return=-77.0 (+1 eps) [worker 0] episodes_seen=5940 last_return=-92.8 (+1 eps) [worker 1] episodes_seen=5970 last_return=-103.4 (+1 eps) [worker 0] episodes_seen=5950 last_return=-57.0 (+1 eps) [worker 1] episodes_seen=5980 last_return=-84.9 (+1 eps) [worker 0] episodes_seen=5960 last_return=-87.7 (+1 eps) [worker 1] episodes_seen=5990 last_return=-72.8 (+1 eps) [worker 0] episodes_seen=5970 last_return=62.1 (+1 eps) [worker 1] episodes_seen=6000 last_return=-86.7 (+1 eps) [worker 0] episodes_seen=5980 last_return=-81.8 (+1 eps) [worker 1] episodes_seen=6010 last_return=-93.4 (+1 eps) [worker 0] episodes_seen=5990 last_return=-59.2 (+1 eps) [worker 1] episodes_seen=6020 last_return=-77.2 (+1 eps) [worker 0] episodes_seen=6000 last_return=-92.9 (+1 eps) [worker 1] episodes_seen=6030 last_return=-106.3 (+1 eps) [worker 0] episodes_seen=6010 last_return=-55.3 (+1 eps) [worker 1] episodes_seen=6040 last_return=-93.4 (+1 eps) [worker 0] episodes_seen=6020 last_return=-89.2 (+1 eps) [worker 1] episodes_seen=6050 last_return=-95.4 (+1 eps) [worker 0] episodes_seen=6030 last_return=-63.2 (+1 eps) [worker 1] episodes_seen=6060 last_return=-97.2 (+1 eps) [worker 0] episodes_seen=6040 last_return=-96.7 (+1 eps) [worker 1] episodes_seen=6070 last_return=-42.9 (+1 eps) [worker 0] episodes_seen=6050 last_return=-121.4 (+1 eps) [worker 1] episodes_seen=6080 last_return=-68.0 (+1 eps) [worker 0] episodes_seen=6060 last_return=-94.3 (+1 eps) [worker 1] episodes_seen=6090 last_return=-89.5 (+1 eps) [worker 0] episodes_seen=6070 last_return=-96.4 (+1 eps) [worker 1] episodes_seen=6100 last_return=-99.9 (+1 eps) [worker 0] episodes_seen=6080 last_return=-86.2 (+1 eps) [worker 1] episodes_seen=6110 last_return=-93.7 (+1 eps) [worker 0] episodes_seen=6090 last_return=-92.9 (+1 eps) [worker 1] episodes_seen=6120 last_return=-97.7 (+1 eps) [worker 0] episodes_seen=6100 last_return=-80.1 (+1 eps) [worker 1] episodes_seen=6130 last_return=-67.6 (+1 eps) [A2C][sync] it=19001 steps= 1140060 (+ 60) avg10= -87.67 loss=33.375 pg=-0.070 vf=51.463 H=0.610 gn=377.883 [worker 0] episodes_seen=6110 last_return=-108.1 (+1 eps) [worker 1] episodes_seen=6140 last_return=-96.1 (+1 eps) [worker 0] episodes_seen=6120 last_return=-76.7 (+1 eps) [worker 1] episodes_seen=6150 last_return=-107.0 (+1 eps) [worker 0] episodes_seen=6130 last_return=-82.6 (+1 eps) [worker 1] episodes_seen=6160 last_return=-94.1 (+1 eps) [worker 0] episodes_seen=6140 last_return=-96.2 (+1 eps) [worker 1] episodes_seen=6170 last_return=-87.3 (+1 eps) [worker 0] episodes_seen=6150 last_return=-104.3 (+1 eps) [worker 1] episodes_seen=6180 last_return=-95.4 (+1 eps) [worker 0] episodes_seen=6160 last_return=-93.4 (+1 eps) [worker 1] episodes_seen=6190 last_return=-122.2 (+1 eps) [worker 0] episodes_seen=6170 last_return=-114.3 (+1 eps) [worker 1] episodes_seen=6200 last_return=-72.7 (+1 eps) [worker 0] episodes_seen=6180 last_return=-82.5 (+1 eps) [worker 1] episodes_seen=6210 last_return=-98.1 (+1 eps) [worker 0] episodes_seen=6190 last_return=-68.8 (+1 eps) [worker 1] episodes_seen=6220 last_return=-35.0 (+1 eps) [worker 0] episodes_seen=6200 last_return=-77.2 (+1 eps) [worker 1] episodes_seen=6230 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=6210 last_return=-9.3 (+1 eps) [worker 0] episodes_seen=6220 last_return=-58.5 (+1 eps) [worker 1] episodes_seen=6240 last_return=-99.8 (+1 eps) [worker 0] episodes_seen=6230 last_return=-59.9 (+1 eps) [worker 0] episodes_seen=6240 last_return=-70.0 (+1 eps) [worker 1] episodes_seen=6250 last_return=-3.4 (+1 eps) [worker 0] episodes_seen=6250 last_return=-62.3 (+1 eps) [worker 1] episodes_seen=6260 last_return=-60.0 (+1 eps) [worker 0] episodes_seen=6260 last_return=-86.8 (+1 eps) [worker 1] episodes_seen=6270 last_return=-48.3 (+1 eps) [worker 0] episodes_seen=6270 last_return=-103.4 (+1 eps) [worker 1] episodes_seen=6280 last_return=81.6 (+1 eps) [worker 0] episodes_seen=6280 last_return=-10.9 (+1 eps) [worker 1] episodes_seen=6290 last_return=-89.7 (+1 eps) [worker 0] episodes_seen=6290 last_return=-85.2 (+1 eps) [worker 1] episodes_seen=6300 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=6300 last_return=-54.7 (+1 eps) [worker 1] episodes_seen=6310 last_return=-67.9 (+1 eps) [worker 0] episodes_seen=6310 last_return=-105.5 (+1 eps) [worker 1] episodes_seen=6320 last_return=-101.8 (+1 eps) [worker 1] episodes_seen=6330 last_return=-35.5 (+1 eps) [worker 0] episodes_seen=6320 last_return=-43.9 (+1 eps) [worker 1] episodes_seen=6340 last_return=-79.7 (+1 eps) [worker 0] episodes_seen=6330 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=6350 last_return=-58.0 (+1 eps) [worker 0] episodes_seen=6340 last_return=-8.3 (+1 eps) [worker 1] episodes_seen=6360 last_return=-183.4 (+1 eps) [A2C][sync] it=20000 steps= 1200000 (+ 60) avg10= -99.19 loss=101.858 pg=-0.017 vf=156.743 H=0.821 gn=1713.591 [Checkpoint] Saved self-describing checkpoint → part2_artifacts/checkpoints/a2c_run17_seed1227.pth [A2C][sync] done: steps=1200000 time=1135.5s avg10=-99.19
[Run run17_seed1227] checkpoint: part2_artifacts/checkpoints/a2c_run17_seed1227.pth [Run run17_seed1227] training plot (tail 500): part2_artifacts/train_curve_run17_seed1227.png [Run run17_seed1227] training plot (full): part2_artifacts/train_curve_full_run17_seed1227.png [Run run17_seed1227] per-worker plot: part2_artifacts/train_curve_workers_run17_seed1227.png [Run run17_seed1227] workers-average plot: part2_artifacts/train_curve_workers_avg_run17_seed1227.png
[Eval run17_seed1227] mean=-177.68 std=158.48 min=-498.91 max=-56.82 [Eval run17_seed1227] CSV: part2_artifacts/eval10_run17_seed1227.csv [Eval run17_seed1227] plot: part2_artifacts/eval10_run17_seed1227.png [Best] ep=5 return=-56.82 seed=1232 [Video run17_seed1227] episode return=-56.82 [Video run17_seed1227] saved under: part2_artifacts/videos run17_seed1227 | mean=-177.7±158.5 | best_ep=5, best_ret=-56.8
Run#18
run_id = f"run18_seed{SEED}"
model, logs, paths = train_once(
run_id=run_id,
n_workers=3,
total_env_steps=1_000_000,
T=15,
gamma=0.997,
entropy_coef=0.02,
value_coef=0.55,
max_grad_norm=0.5,
lr=2.5e-4,
log_every=50_000,
)
metrics, _ = evaluate_10(run_id, paths.ckpt_path)
video_dir, best_idx, best_ret = record_best_from_eval(run_id, paths.ckpt_path)
print(f"{run_id} | mean={metrics['mean']:.1f}±{metrics['std']:.1f} | best_ep={best_idx}, best_ret={best_ret:.1f}")
[Run run18_seed1227] starting training… [A2C][sync] start: workers=3, T=15, target_steps=1000000, mp=fork [A2C][sync] it= 1 steps= 45 (+ 45) avg10= nan loss=22.239 pg=0.000 vf=40.485 H=1.386 gn=12.359 [worker 2] episodes_seen=10 last_return=-100.6 (+1 eps) [worker 0] episodes_seen=10 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=10 last_return=-169.5 (+1 eps) [worker 2] episodes_seen=20 last_return=-134.9 (+1 eps) [worker 1] episodes_seen=20 last_return=-166.2 (+1 eps) [worker 0] episodes_seen=20 last_return=-157.9 (+1 eps) [worker 2] episodes_seen=30 last_return=-195.0 (+1 eps) [worker 1] episodes_seen=30 last_return=-140.7 (+1 eps) [worker 0] episodes_seen=30 last_return=-185.2 (+1 eps) [worker 2] episodes_seen=40 last_return=-155.0 (+1 eps) [worker 1] episodes_seen=40 last_return=-177.2 (+1 eps) [worker 0] episodes_seen=40 last_return=-167.5 (+1 eps) [worker 2] episodes_seen=50 last_return=-176.6 (+1 eps) [worker 1] episodes_seen=50 last_return=-126.2 (+1 eps) [worker 0] episodes_seen=50 last_return=-183.4 (+1 eps) [worker 2] episodes_seen=60 last_return=-130.4 (+1 eps) [worker 1] episodes_seen=60 last_return=-160.5 (+1 eps) [worker 0] episodes_seen=60 last_return=-172.9 (+1 eps) [worker 2] episodes_seen=70 last_return=-198.0 (+1 eps) [worker 1] episodes_seen=70 last_return=-233.9 (+1 eps) [worker 0] episodes_seen=70 last_return=-246.3 (+1 eps) [worker 1] episodes_seen=80 last_return=-182.6 (+1 eps) [worker 2] episodes_seen=80 last_return=-216.1 (+1 eps) [worker 0] episodes_seen=80 last_return=-147.4 (+1 eps) [worker 0] episodes_seen=90 last_return=-222.8 (+1 eps) [worker 2] episodes_seen=90 last_return=-152.8 (+1 eps) [worker 1] episodes_seen=90 last_return=-192.9 (+1 eps) [worker 0] episodes_seen=100 last_return=-175.2 (+1 eps) [worker 1] episodes_seen=100 last_return=-173.7 (+1 eps) [worker 2] episodes_seen=100 last_return=-109.1 (+1 eps) [worker 0] episodes_seen=110 last_return=-198.8 (+1 eps) [worker 2] episodes_seen=110 last_return=-180.2 (+1 eps) [worker 1] episodes_seen=110 last_return=-203.4 (+1 eps) [worker 2] episodes_seen=120 last_return=-139.2 (+1 eps) [worker 0] episodes_seen=120 last_return=-151.5 (+1 eps) [worker 1] episodes_seen=120 last_return=-146.3 (+1 eps) [worker 2] episodes_seen=130 last_return=-173.2 (+1 eps) [worker 0] episodes_seen=130 last_return=-173.5 (+1 eps) [worker 2] episodes_seen=140 last_return=-177.2 (+1 eps) [worker 1] episodes_seen=130 last_return=-153.2 (+1 eps) [worker 0] episodes_seen=140 last_return=-122.8 (+1 eps) [A2C][sync] it= 1113 steps= 50085 (+ 45) avg10=-164.76 loss=183648.922 pg=-0.000 vf=333907.125 H=0.000 gn=126813.680 [worker 0] episodes_seen=150 last_return=-148.1 (+1 eps) [worker 2] episodes_seen=150 last_return=-105.9 (+1 eps) [worker 1] episodes_seen=140 last_return=-168.9 (+1 eps) [worker 0] episodes_seen=160 last_return=-118.1 (+1 eps) [worker 2] episodes_seen=160 last_return=-204.2 (+1 eps) [worker 1] episodes_seen=150 last_return=-215.9 (+1 eps) [worker 0] episodes_seen=170 last_return=-161.1 (+1 eps) [worker 2] episodes_seen=170 last_return=-187.1 (+1 eps) [worker 1] episodes_seen=160 last_return=-135.1 (+1 eps) [worker 0] episodes_seen=180 last_return=-108.2 (+1 eps) [worker 1] episodes_seen=170 last_return=-146.7 (+1 eps) [worker 2] episodes_seen=180 last_return=-221.7 (+1 eps) [worker 0] episodes_seen=190 last_return=-169.6 (+1 eps) [worker 1] episodes_seen=180 last_return=-138.9 (+1 eps) [worker 2] episodes_seen=190 last_return=-220.3 (+1 eps) [worker 0] episodes_seen=200 last_return=-141.6 (+1 eps) [worker 1] episodes_seen=190 last_return=-166.5 (+1 eps) [worker 2] episodes_seen=200 last_return=-209.3 (+1 eps) [worker 0] episodes_seen=210 last_return=-207.8 (+1 eps) [worker 1] episodes_seen=200 last_return=-122.5 (+1 eps) [worker 2] episodes_seen=210 last_return=-148.9 (+1 eps) [worker 0] episodes_seen=220 last_return=-181.5 (+1 eps) [worker 2] episodes_seen=220 last_return=-131.1 (+1 eps) [worker 1] episodes_seen=210 last_return=-109.6 (+1 eps) [worker 0] episodes_seen=230 last_return=-160.1 (+1 eps) [worker 2] episodes_seen=230 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=220 last_return=-188.0 (+1 eps) [worker 0] episodes_seen=240 last_return=-197.5 (+1 eps) [worker 2] episodes_seen=240 last_return=-119.8 (+1 eps) [worker 0] episodes_seen=250 last_return=-131.8 (+1 eps) [worker 1] episodes_seen=230 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=250 last_return=-202.5 (+1 eps) [worker 0] episodes_seen=260 last_return=-206.2 (+1 eps) [worker 1] episodes_seen=240 last_return=-140.2 (+1 eps) [worker 2] episodes_seen=260 last_return=-192.0 (+1 eps) [worker 1] episodes_seen=250 last_return=-187.4 (+1 eps) [worker 0] episodes_seen=270 last_return=-152.2 (+1 eps) [worker 2] episodes_seen=270 last_return=-144.8 (+1 eps) [worker 1] episodes_seen=260 last_return=-187.0 (+1 eps) [worker 0] episodes_seen=280 last_return=-173.3 (+1 eps) [worker 2] episodes_seen=280 last_return=-119.1 (+1 eps) [worker 1] episodes_seen=270 last_return=-197.1 (+1 eps) [A2C][sync] it= 2225 steps= 100125 (+ 45) avg10=-186.25 loss=116148.102 pg=-0.000 vf=211178.359 H=0.000 gn=760321.688 [worker 0] episodes_seen=290 last_return=-134.3 (+1 eps) [worker 2] episodes_seen=290 last_return=-249.6 (+1 eps) [worker 1] episodes_seen=280 last_return=-137.8 (+1 eps) [worker 2] episodes_seen=300 last_return=-141.8 (+1 eps) [worker 0] episodes_seen=300 last_return=-171.6 (+1 eps) [worker 1] episodes_seen=290 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=310 last_return=-121.2 (+1 eps) [worker 0] episodes_seen=310 last_return=-220.7 (+1 eps) [worker 1] episodes_seen=300 last_return=-120.4 (+1 eps) [worker 2] episodes_seen=320 last_return=-206.2 (+1 eps) [worker 0] episodes_seen=320 last_return=-165.4 (+1 eps) [worker 1] episodes_seen=310 last_return=-156.3 (+1 eps) [worker 2] episodes_seen=330 last_return=-222.0 (+1 eps) [worker 0] episodes_seen=330 last_return=-172.6 (+1 eps) [worker 1] episodes_seen=320 last_return=-178.0 (+1 eps) [worker 0] episodes_seen=340 last_return=-232.2 (+1 eps) [worker 2] episodes_seen=340 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=330 last_return=-142.9 (+1 eps) [worker 0] episodes_seen=350 last_return=-186.2 (+1 eps) [worker 2] episodes_seen=350 last_return=-205.4 (+1 eps) [worker 1] episodes_seen=340 last_return=-202.6 (+1 eps) [worker 0] episodes_seen=360 last_return=-193.0 (+1 eps) [worker 2] episodes_seen=360 last_return=-171.8 (+1 eps) [worker 1] episodes_seen=350 last_return=-129.4 (+1 eps) [worker 1] episodes_seen=360 last_return=-146.1 (+1 eps) [worker 0] episodes_seen=370 last_return=-129.5 (+1 eps) [worker 2] episodes_seen=370 last_return=-106.8 (+1 eps) [worker 1] episodes_seen=370 last_return=-115.9 (+1 eps) [worker 2] episodes_seen=380 last_return=-161.0 (+1 eps) [worker 0] episodes_seen=380 last_return=-117.9 (+1 eps) [worker 1] episodes_seen=380 last_return=-214.7 (+1 eps) [worker 2] episodes_seen=390 last_return=-194.7 (+1 eps) [worker 0] episodes_seen=390 last_return=-225.0 (+1 eps) [worker 2] episodes_seen=400 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=390 last_return=-212.8 (+1 eps) [worker 0] episodes_seen=400 last_return=-129.5 (+1 eps) [worker 1] episodes_seen=400 last_return=-131.1 (+1 eps) [worker 2] episodes_seen=410 last_return=-151.9 (+1 eps) [worker 0] episodes_seen=410 last_return=-194.0 (+1 eps) [worker 2] episodes_seen=420 last_return=-107.7 (+1 eps) [worker 1] episodes_seen=410 last_return=-114.0 (+1 eps) [A2C][sync] it= 3337 steps= 150165 (+ 45) avg10=-140.15 loss=98794584.000 pg=-0.000 vf=179626512.000 H=0.000 gn=35698788.000 [worker 0] episodes_seen=420 last_return=-151.4 (+1 eps) [worker 1] episodes_seen=420 last_return=-186.7 (+1 eps) [worker 2] episodes_seen=430 last_return=-122.5 (+1 eps) [worker 0] episodes_seen=430 last_return=-185.5 (+1 eps) [worker 1] episodes_seen=430 last_return=-180.9 (+1 eps) [worker 2] episodes_seen=440 last_return=-208.9 (+1 eps) [worker 0] episodes_seen=440 last_return=-124.2 (+1 eps) [worker 2] episodes_seen=450 last_return=-192.9 (+1 eps) [worker 1] episodes_seen=440 last_return=-150.6 (+1 eps) [worker 0] episodes_seen=450 last_return=-208.7 (+1 eps) [worker 2] episodes_seen=460 last_return=-146.4 (+1 eps) [worker 0] episodes_seen=460 last_return=-172.9 (+1 eps) [worker 1] episodes_seen=450 last_return=-157.9 (+1 eps) [worker 2] episodes_seen=470 last_return=-202.7 (+1 eps) [worker 0] episodes_seen=470 last_return=-148.2 (+1 eps) [worker 1] episodes_seen=460 last_return=-290.0 (+1 eps) [worker 2] episodes_seen=480 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=470 last_return=-174.3 (+1 eps) [worker 0] episodes_seen=480 last_return=-266.9 (+1 eps) [worker 2] episodes_seen=490 last_return=-156.8 (+1 eps) [worker 1] episodes_seen=480 last_return=-108.2 (+1 eps) [worker 0] episodes_seen=490 last_return=-222.6 (+1 eps) [worker 2] episodes_seen=500 last_return=-123.8 (+1 eps) [worker 1] episodes_seen=490 last_return=-138.1 (+1 eps) [worker 0] episodes_seen=500 last_return=-215.9 (+1 eps) [worker 2] episodes_seen=510 last_return=-159.1 (+1 eps) [worker 1] episodes_seen=500 last_return=-196.2 (+1 eps) [worker 0] episodes_seen=510 last_return=-107.4 (+1 eps) [worker 2] episodes_seen=520 last_return=-140.2 (+1 eps) [worker 1] episodes_seen=510 last_return=-229.0 (+1 eps) [worker 0] episodes_seen=520 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=530 last_return=-241.0 (+1 eps) [worker 1] episodes_seen=520 last_return=-189.5 (+1 eps) [worker 0] episodes_seen=530 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=540 last_return=-131.4 (+1 eps) [worker 0] episodes_seen=540 last_return=-149.3 (+1 eps) [worker 1] episodes_seen=530 last_return=-152.1 (+1 eps) [worker 2] episodes_seen=550 last_return=-193.6 (+1 eps) [worker 0] episodes_seen=550 last_return=-145.6 (+1 eps) [worker 1] episodes_seen=540 last_return=-177.7 (+1 eps) [worker 0] episodes_seen=560 last_return=-150.3 (+1 eps) [worker 2] episodes_seen=560 last_return=-148.6 (+1 eps) [A2C][sync] it= 4449 steps= 200205 (+ 45) avg10=-157.87 loss=814429.812 pg=-0.000 vf=1480781.500 H=0.000 gn=3141382.750 [worker 1] episodes_seen=550 last_return=-173.9 (+1 eps) [worker 2] episodes_seen=570 last_return=-131.6 (+1 eps) [worker 0] episodes_seen=570 last_return=-150.8 (+1 eps) [worker 1] episodes_seen=560 last_return=-146.5 (+1 eps) [worker 2] episodes_seen=580 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=580 last_return=-145.4 (+1 eps) [worker 1] episodes_seen=570 last_return=-143.8 (+1 eps) [worker 0] episodes_seen=590 last_return=-165.0 (+1 eps) [worker 2] episodes_seen=590 last_return=-189.3 (+1 eps) [worker 1] episodes_seen=580 last_return=-223.9 (+1 eps) [worker 0] episodes_seen=600 last_return=-215.1 (+1 eps) [worker 2] episodes_seen=600 last_return=-158.6 (+1 eps) [worker 1] episodes_seen=590 last_return=-122.9 (+1 eps) [worker 0] episodes_seen=610 last_return=-165.7 (+1 eps) [worker 2] episodes_seen=610 last_return=-267.6 (+1 eps) [worker 1] episodes_seen=600 last_return=-182.4 (+1 eps) [worker 0] episodes_seen=620 last_return=-125.2 (+1 eps) [worker 2] episodes_seen=620 last_return=-121.0 (+1 eps) [worker 1] episodes_seen=610 last_return=-115.0 (+1 eps) [worker 0] episodes_seen=630 last_return=-215.3 (+1 eps) [worker 2] episodes_seen=630 last_return=-243.7 (+1 eps) [worker 1] episodes_seen=620 last_return=-192.0 (+1 eps) [worker 0] episodes_seen=640 last_return=-114.3 (+1 eps) [worker 2] episodes_seen=640 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=630 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=650 last_return=-203.7 (+1 eps) [worker 1] episodes_seen=640 last_return=-210.2 (+1 eps) [worker 2] episodes_seen=650 last_return=-176.3 (+1 eps) [worker 0] episodes_seen=660 last_return=-114.8 (+1 eps) [worker 1] episodes_seen=650 last_return=-139.4 (+1 eps) [worker 2] episodes_seen=660 last_return=-186.1 (+1 eps) [worker 0] episodes_seen=670 last_return=-189.7 (+1 eps) [worker 1] episodes_seen=660 last_return=-168.6 (+1 eps) [worker 2] episodes_seen=670 last_return=-173.9 (+1 eps) [worker 0] episodes_seen=680 last_return=-177.1 (+1 eps) [worker 1] episodes_seen=670 last_return=-177.0 (+1 eps) [worker 2] episodes_seen=680 last_return=-139.3 (+1 eps) [worker 0] episodes_seen=690 last_return=-138.3 (+1 eps) [worker 1] episodes_seen=680 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=690 last_return=-131.3 (+1 eps) [worker 0] episodes_seen=700 last_return=-100.0 (+1 eps) [A2C][sync] it= 5561 steps= 250245 (+ 45) avg10=-144.50 loss=5012535.000 pg=-0.000 vf=9113700.000 H=0.000 gn=20270770.000 [worker 1] episodes_seen=690 last_return=-146.4 (+1 eps) [worker 0] episodes_seen=710 last_return=-109.8 (+1 eps) [worker 2] episodes_seen=700 last_return=-114.8 (+1 eps) [worker 1] episodes_seen=700 last_return=-128.6 (+1 eps) [worker 0] episodes_seen=720 last_return=-160.5 (+1 eps) [worker 2] episodes_seen=710 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=710 last_return=-126.7 (+1 eps) [worker 0] episodes_seen=730 last_return=-157.2 (+1 eps) [worker 2] episodes_seen=720 last_return=-143.8 (+1 eps) [worker 1] episodes_seen=720 last_return=-177.8 (+1 eps) [worker 2] episodes_seen=730 last_return=-204.5 (+1 eps) [worker 0] episodes_seen=740 last_return=-191.5 (+1 eps) [worker 1] episodes_seen=730 last_return=-188.3 (+1 eps) [worker 2] episodes_seen=740 last_return=-200.1 (+1 eps) [worker 0] episodes_seen=750 last_return=-219.4 (+1 eps) [worker 1] episodes_seen=740 last_return=-232.0 (+1 eps) [worker 2] episodes_seen=750 last_return=-158.1 (+1 eps) [worker 0] episodes_seen=760 last_return=-181.0 (+1 eps) [worker 1] episodes_seen=750 last_return=-115.3 (+1 eps) [worker 2] episodes_seen=760 last_return=-163.8 (+1 eps) [worker 0] episodes_seen=770 last_return=-228.0 (+1 eps) [worker 1] episodes_seen=760 last_return=-172.8 (+1 eps) [worker 2] episodes_seen=770 last_return=-203.3 (+1 eps) [worker 0] episodes_seen=780 last_return=-117.6 (+1 eps) [worker 1] episodes_seen=770 last_return=-108.5 (+1 eps) [worker 2] episodes_seen=780 last_return=-152.0 (+1 eps) [worker 0] episodes_seen=790 last_return=-175.4 (+1 eps) [worker 1] episodes_seen=780 last_return=-152.0 (+1 eps) [worker 2] episodes_seen=790 last_return=-110.8 (+1 eps) [worker 0] episodes_seen=800 last_return=-203.6 (+1 eps) [worker 1] episodes_seen=790 last_return=-168.3 (+1 eps) [worker 0] episodes_seen=810 last_return=-178.9 (+1 eps) [worker 2] episodes_seen=800 last_return=-150.2 (+1 eps) [worker 0] episodes_seen=820 last_return=-201.0 (+1 eps) [worker 1] episodes_seen=800 last_return=-207.3 (+1 eps) [worker 2] episodes_seen=810 last_return=-173.9 (+1 eps) [worker 0] episodes_seen=830 last_return=-161.2 (+1 eps) [worker 1] episodes_seen=810 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=820 last_return=-160.3 (+1 eps) [A2C][sync] it= 6673 steps= 300285 (+ 45) avg10=-155.98 loss=13098783.000 pg=-0.000 vf=23815968.000 H=0.000 gn=47688380.000 [worker 1] episodes_seen=820 last_return=-217.4 (+1 eps) [worker 0] episodes_seen=840 last_return=-214.4 (+1 eps) [worker 2] episodes_seen=830 last_return=-182.9 (+1 eps) [worker 0] episodes_seen=850 last_return=-206.8 (+1 eps) [worker 1] episodes_seen=830 last_return=-193.6 (+1 eps) [worker 2] episodes_seen=840 last_return=-119.7 (+1 eps) [worker 0] episodes_seen=860 last_return=-151.3 (+1 eps) [worker 1] episodes_seen=840 last_return=-148.8 (+1 eps) [worker 2] episodes_seen=850 last_return=-232.0 (+1 eps) [worker 1] episodes_seen=850 last_return=-161.4 (+1 eps) [worker 0] episodes_seen=870 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=860 last_return=-114.5 (+1 eps) [worker 1] episodes_seen=860 last_return=-212.4 (+1 eps) [worker 0] episodes_seen=880 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=870 last_return=-193.2 (+1 eps) [worker 1] episodes_seen=870 last_return=-132.4 (+1 eps) [worker 0] episodes_seen=890 last_return=-105.3 (+1 eps) [worker 2] episodes_seen=880 last_return=-169.9 (+1 eps) [worker 1] episodes_seen=880 last_return=-158.1 (+1 eps) [worker 0] episodes_seen=900 last_return=-158.7 (+1 eps) [worker 2] episodes_seen=890 last_return=-154.9 (+1 eps) [worker 0] episodes_seen=910 last_return=-228.4 (+1 eps) [worker 1] episodes_seen=890 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=900 last_return=-107.0 (+1 eps) [worker 1] episodes_seen=900 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=920 last_return=-259.4 (+1 eps) [worker 2] episodes_seen=910 last_return=-148.8 (+1 eps) [worker 1] episodes_seen=910 last_return=-107.3 (+1 eps) [worker 0] episodes_seen=930 last_return=-137.4 (+1 eps) [worker 1] episodes_seen=920 last_return=-192.5 (+1 eps) [worker 2] episodes_seen=920 last_return=-129.4 (+1 eps) [worker 0] episodes_seen=940 last_return=-201.7 (+1 eps) [worker 1] episodes_seen=930 last_return=-213.0 (+1 eps) [worker 2] episodes_seen=930 last_return=-140.8 (+1 eps) [worker 0] episodes_seen=950 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=940 last_return=-187.3 (+1 eps) [worker 2] episodes_seen=940 last_return=-168.9 (+1 eps) [worker 0] episodes_seen=960 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=950 last_return=-185.0 (+1 eps) [worker 2] episodes_seen=950 last_return=-233.5 (+1 eps) [worker 0] episodes_seen=970 last_return=-116.3 (+1 eps) [worker 1] episodes_seen=960 last_return=-201.7 (+1 eps) [A2C][sync] it= 7785 steps= 350325 (+ 45) avg10=-174.16 loss=41879876.000 pg=-0.000 vf=76145224.000 H=0.000 gn=153574208.000 [worker 2] episodes_seen=960 last_return=-156.0 (+1 eps) [worker 0] episodes_seen=980 last_return=-184.1 (+1 eps) [worker 1] episodes_seen=970 last_return=-171.6 (+1 eps) [worker 2] episodes_seen=970 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=980 last_return=-106.5 (+1 eps) [worker 0] episodes_seen=990 last_return=-141.8 (+1 eps) [worker 2] episodes_seen=980 last_return=-182.3 (+1 eps) [worker 1] episodes_seen=990 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1000 last_return=-219.3 (+1 eps) [worker 2] episodes_seen=990 last_return=-157.0 (+1 eps) [worker 1] episodes_seen=1000 last_return=-125.5 (+1 eps) [worker 0] episodes_seen=1010 last_return=-154.9 (+1 eps) [worker 2] episodes_seen=1000 last_return=-168.9 (+1 eps) [worker 1] episodes_seen=1010 last_return=-218.2 (+1 eps) [worker 2] episodes_seen=1010 last_return=-164.2 (+1 eps) [worker 0] episodes_seen=1020 last_return=-195.5 (+1 eps) [worker 1] episodes_seen=1020 last_return=-136.0 (+1 eps) [worker 2] episodes_seen=1020 last_return=-222.3 (+1 eps) [worker 0] episodes_seen=1030 last_return=-203.7 (+1 eps) [worker 1] episodes_seen=1030 last_return=-217.5 (+1 eps) [worker 2] episodes_seen=1030 last_return=-140.2 (+1 eps) [worker 0] episodes_seen=1040 last_return=-153.2 (+1 eps) [worker 1] episodes_seen=1040 last_return=-188.3 (+1 eps) [worker 2] episodes_seen=1040 last_return=-110.4 (+1 eps) [worker 0] episodes_seen=1050 last_return=-271.1 (+1 eps) [worker 0] episodes_seen=1060 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1050 last_return=-160.5 (+1 eps) [worker 2] episodes_seen=1050 last_return=-122.4 (+1 eps) [worker 1] episodes_seen=1060 last_return=-129.2 (+1 eps) [worker 0] episodes_seen=1070 last_return=-185.6 (+1 eps) [worker 2] episodes_seen=1060 last_return=-223.7 (+1 eps) [worker 1] episodes_seen=1070 last_return=-130.1 (+1 eps) [worker 0] episodes_seen=1080 last_return=-171.3 (+1 eps) [worker 2] episodes_seen=1070 last_return=-154.8 (+1 eps) [worker 1] episodes_seen=1080 last_return=-124.9 (+1 eps) [worker 0] episodes_seen=1090 last_return=-191.7 (+1 eps) [worker 2] episodes_seen=1080 last_return=-140.1 (+1 eps) [worker 1] episodes_seen=1090 last_return=-197.3 (+1 eps) [worker 0] episodes_seen=1100 last_return=-174.5 (+1 eps) [worker 2] episodes_seen=1090 last_return=-151.3 (+1 eps) [worker 1] episodes_seen=1100 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1110 last_return=-138.7 (+1 eps) [A2C][sync] it= 8897 steps= 400365 (+ 45) avg10=-179.01 loss=62324040.000 pg=-0.000 vf=113316432.000 H=0.000 gn=178043328.000 [worker 2] episodes_seen=1100 last_return=-143.5 (+1 eps) [worker 0] episodes_seen=1120 last_return=-205.1 (+1 eps) [worker 1] episodes_seen=1110 last_return=-115.4 (+1 eps) [worker 2] episodes_seen=1110 last_return=-189.6 (+1 eps) [worker 0] episodes_seen=1130 last_return=-143.3 (+1 eps) [worker 2] episodes_seen=1120 last_return=-114.5 (+1 eps) [worker 1] episodes_seen=1120 last_return=-119.2 (+1 eps) [worker 0] episodes_seen=1140 last_return=-228.8 (+1 eps) [worker 2] episodes_seen=1130 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1130 last_return=-186.1 (+1 eps) [worker 0] episodes_seen=1150 last_return=-187.2 (+1 eps) [worker 1] episodes_seen=1140 last_return=-168.7 (+1 eps) [worker 2] episodes_seen=1140 last_return=-123.3 (+1 eps) [worker 0] episodes_seen=1160 last_return=-138.6 (+1 eps) [worker 1] episodes_seen=1150 last_return=-192.7 (+1 eps) [worker 2] episodes_seen=1150 last_return=-114.4 (+1 eps) [worker 0] episodes_seen=1170 last_return=-143.2 (+1 eps) [worker 1] episodes_seen=1160 last_return=-165.4 (+1 eps) [worker 2] episodes_seen=1160 last_return=-136.7 (+1 eps) [worker 0] episodes_seen=1180 last_return=-175.4 (+1 eps) [worker 1] episodes_seen=1170 last_return=-190.5 (+1 eps) [worker 2] episodes_seen=1170 last_return=-123.4 (+1 eps) [worker 0] episodes_seen=1190 last_return=-165.3 (+1 eps) [worker 1] episodes_seen=1180 last_return=-210.8 (+1 eps) [worker 2] episodes_seen=1180 last_return=-161.5 (+1 eps) [worker 0] episodes_seen=1200 last_return=-106.6 (+1 eps) [worker 1] episodes_seen=1190 last_return=-183.1 (+1 eps) [worker 2] episodes_seen=1190 last_return=-207.3 (+1 eps) [worker 0] episodes_seen=1210 last_return=-140.5 (+1 eps) [worker 1] episodes_seen=1200 last_return=-140.7 (+1 eps) [worker 2] episodes_seen=1200 last_return=-175.8 (+1 eps) [worker 0] episodes_seen=1220 last_return=-203.4 (+1 eps) [worker 1] episodes_seen=1210 last_return=-144.2 (+1 eps) [worker 0] episodes_seen=1230 last_return=-170.2 (+1 eps) [worker 2] episodes_seen=1210 last_return=-170.2 (+1 eps) [worker 1] episodes_seen=1220 last_return=-137.0 (+1 eps) [worker 2] episodes_seen=1220 last_return=-131.2 (+1 eps) [worker 0] episodes_seen=1240 last_return=-194.1 (+1 eps) [worker 1] episodes_seen=1230 last_return=-166.2 (+1 eps) [worker 2] episodes_seen=1230 last_return=-157.9 (+1 eps) [worker 0] episodes_seen=1250 last_return=-174.2 (+1 eps) [A2C][sync] it=10009 steps= 450405 (+ 45) avg10=-169.37 loss=120381176.000 pg=-0.000 vf=218874864.000 H=0.000 gn=399004704.000 [worker 2] episodes_seen=1240 last_return=-185.7 (+1 eps) [worker 1] episodes_seen=1240 last_return=-256.6 (+1 eps) [worker 0] episodes_seen=1260 last_return=-145.6 (+1 eps) [worker 1] episodes_seen=1250 last_return=-150.5 (+1 eps) [worker 2] episodes_seen=1250 last_return=-108.3 (+1 eps) [worker 0] episodes_seen=1270 last_return=-156.7 (+1 eps) [worker 1] episodes_seen=1260 last_return=-162.2 (+1 eps) [worker 0] episodes_seen=1280 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1260 last_return=-248.8 (+1 eps) [worker 2] episodes_seen=1270 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1270 last_return=-150.7 (+1 eps) [worker 0] episodes_seen=1290 last_return=-197.3 (+1 eps) [worker 2] episodes_seen=1280 last_return=-148.3 (+1 eps) [worker 1] episodes_seen=1280 last_return=-229.3 (+1 eps) [worker 0] episodes_seen=1300 last_return=-235.3 (+1 eps) [worker 2] episodes_seen=1290 last_return=-117.6 (+1 eps) [worker 1] episodes_seen=1290 last_return=-177.4 (+1 eps) [worker 0] episodes_seen=1310 last_return=-150.1 (+1 eps) [worker 2] episodes_seen=1300 last_return=-183.4 (+1 eps) [worker 1] episodes_seen=1300 last_return=-182.6 (+1 eps) [worker 0] episodes_seen=1320 last_return=-108.3 (+1 eps) [worker 2] episodes_seen=1310 last_return=-171.1 (+1 eps) [worker 1] episodes_seen=1310 last_return=-233.7 (+1 eps) [worker 0] episodes_seen=1330 last_return=-108.2 (+1 eps) [worker 0] episodes_seen=1340 last_return=-209.7 (+1 eps) [worker 1] episodes_seen=1320 last_return=-213.5 (+1 eps) [worker 2] episodes_seen=1320 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1350 last_return=-108.6 (+1 eps) [worker 2] episodes_seen=1330 last_return=-186.1 (+1 eps) [worker 1] episodes_seen=1330 last_return=-236.8 (+1 eps) [worker 0] episodes_seen=1360 last_return=-181.2 (+1 eps) [worker 2] episodes_seen=1340 last_return=-209.6 (+1 eps) [worker 1] episodes_seen=1340 last_return=-184.2 (+1 eps) [worker 0] episodes_seen=1370 last_return=-199.2 (+1 eps) [worker 2] episodes_seen=1350 last_return=-203.6 (+1 eps) [worker 1] episodes_seen=1350 last_return=-158.2 (+1 eps) [worker 0] episodes_seen=1380 last_return=-247.9 (+1 eps) [worker 2] episodes_seen=1360 last_return=-176.9 (+1 eps) [worker 1] episodes_seen=1360 last_return=-133.2 (+1 eps) [worker 0] episodes_seen=1390 last_return=-119.1 (+1 eps) [A2C][sync] it=11121 steps= 500445 (+ 45) avg10=-165.53 loss=10537841664.000 pg=-0.000 vf=19159711744.000 H=0.000 gn=1129434112.000 [worker 2] episodes_seen=1370 last_return=-173.5 (+1 eps) [worker 1] episodes_seen=1370 last_return=-225.6 (+1 eps) [worker 0] episodes_seen=1400 last_return=-177.7 (+1 eps) [worker 2] episodes_seen=1380 last_return=-246.6 (+1 eps) [worker 1] episodes_seen=1380 last_return=-148.8 (+1 eps) [worker 0] episodes_seen=1410 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1390 last_return=-167.0 (+1 eps) [worker 0] episodes_seen=1420 last_return=-223.4 (+1 eps) [worker 1] episodes_seen=1390 last_return=-112.8 (+1 eps) [worker 0] episodes_seen=1430 last_return=-223.4 (+1 eps) [worker 1] episodes_seen=1400 last_return=-184.0 (+1 eps) [worker 2] episodes_seen=1400 last_return=-147.7 (+1 eps) [worker 2] episodes_seen=1410 last_return=-109.5 (+1 eps) [worker 0] episodes_seen=1440 last_return=-192.4 (+1 eps) [worker 1] episodes_seen=1410 last_return=-290.6 (+1 eps) [worker 0] episodes_seen=1450 last_return=-204.8 (+1 eps) [worker 1] episodes_seen=1420 last_return=-165.3 (+1 eps) [worker 2] episodes_seen=1420 last_return=-179.3 (+1 eps) [worker 0] episodes_seen=1460 last_return=-168.7 (+1 eps) [worker 1] episodes_seen=1430 last_return=-236.2 (+1 eps) [worker 2] episodes_seen=1430 last_return=-147.0 (+1 eps) [worker 0] episodes_seen=1470 last_return=-135.9 (+1 eps) [worker 1] episodes_seen=1440 last_return=-141.5 (+1 eps) [worker 2] episodes_seen=1440 last_return=-242.8 (+1 eps) [worker 0] episodes_seen=1480 last_return=-136.8 (+1 eps) [worker 1] episodes_seen=1450 last_return=-207.4 (+1 eps) [worker 2] episodes_seen=1450 last_return=-163.9 (+1 eps) [worker 0] episodes_seen=1490 last_return=-167.6 (+1 eps) [worker 1] episodes_seen=1460 last_return=-132.5 (+1 eps) [worker 2] episodes_seen=1460 last_return=-172.2 (+1 eps) [worker 0] episodes_seen=1500 last_return=-134.8 (+1 eps) [worker 2] episodes_seen=1470 last_return=-133.7 (+1 eps) [worker 1] episodes_seen=1470 last_return=-182.6 (+1 eps) [worker 0] episodes_seen=1510 last_return=-158.0 (+1 eps) [worker 2] episodes_seen=1480 last_return=-169.2 (+1 eps) [worker 1] episodes_seen=1480 last_return=-245.0 (+1 eps) [worker 0] episodes_seen=1520 last_return=-167.2 (+1 eps) [worker 2] episodes_seen=1490 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1490 last_return=-216.2 (+1 eps) [worker 0] episodes_seen=1530 last_return=-140.8 (+1 eps) [worker 2] episodes_seen=1500 last_return=-150.3 (+1 eps) [worker 1] episodes_seen=1500 last_return=-213.6 (+1 eps) [A2C][sync] it=12233 steps= 550485 (+ 45) avg10=-165.87 loss=117170495488.000 pg=-0.000 vf=213037252608.000 H=0.000 gn=15651976192.000 [worker 0] episodes_seen=1540 last_return=-221.3 (+1 eps) [worker 2] episodes_seen=1510 last_return=-164.3 (+1 eps) [worker 1] episodes_seen=1510 last_return=-169.2 (+1 eps) [worker 0] episodes_seen=1550 last_return=-197.2 (+1 eps) [worker 2] episodes_seen=1520 last_return=-159.5 (+1 eps) [worker 0] episodes_seen=1560 last_return=-165.2 (+1 eps) [worker 1] episodes_seen=1520 last_return=-106.8 (+1 eps) [worker 2] episodes_seen=1530 last_return=-156.4 (+1 eps) [worker 0] episodes_seen=1570 last_return=-169.2 (+1 eps) [worker 2] episodes_seen=1540 last_return=-182.9 (+1 eps) [worker 1] episodes_seen=1530 last_return=-149.4 (+1 eps) [worker 0] episodes_seen=1580 last_return=-219.3 (+1 eps) [worker 2] episodes_seen=1550 last_return=-242.6 (+1 eps) [worker 1] episodes_seen=1540 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1590 last_return=-140.0 (+1 eps) [worker 0] episodes_seen=1600 last_return=-145.7 (+1 eps) [worker 2] episodes_seen=1560 last_return=-110.2 (+1 eps) [worker 1] episodes_seen=1550 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1610 last_return=-134.1 (+1 eps) [worker 2] episodes_seen=1570 last_return=-169.1 (+1 eps) [worker 1] episodes_seen=1560 last_return=-151.8 (+1 eps) [worker 0] episodes_seen=1620 last_return=-155.2 (+1 eps) [worker 1] episodes_seen=1570 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1580 last_return=-156.0 (+1 eps) [worker 0] episodes_seen=1630 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1580 last_return=-192.3 (+1 eps) [worker 2] episodes_seen=1590 last_return=-191.6 (+1 eps) [worker 0] episodes_seen=1640 last_return=-214.3 (+1 eps) [worker 1] episodes_seen=1590 last_return=-179.9 (+1 eps) [worker 2] episodes_seen=1600 last_return=-192.3 (+1 eps) [worker 0] episodes_seen=1650 last_return=-109.7 (+1 eps) [worker 1] episodes_seen=1600 last_return=-184.2 (+1 eps) [worker 2] episodes_seen=1610 last_return=-143.1 (+1 eps) [worker 0] episodes_seen=1660 last_return=-182.6 (+1 eps) [worker 1] episodes_seen=1610 last_return=-183.5 (+1 eps) [worker 2] episodes_seen=1620 last_return=-159.3 (+1 eps) [worker 0] episodes_seen=1670 last_return=-213.8 (+1 eps) [worker 1] episodes_seen=1620 last_return=-135.5 (+1 eps) [worker 2] episodes_seen=1630 last_return=-215.8 (+1 eps) [worker 1] episodes_seen=1630 last_return=-165.9 (+1 eps) [worker 0] episodes_seen=1680 last_return=-228.1 (+1 eps) [A2C][sync] it=13345 steps= 600525 (+ 45) avg10=-152.56 loss=28404238336.000 pg=-0.000 vf=51644067840.000 H=0.000 gn=3265239040.000 [worker 2] episodes_seen=1640 last_return=-142.8 (+1 eps) [worker 1] episodes_seen=1640 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1690 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1650 last_return=-238.9 (+1 eps) [worker 1] episodes_seen=1650 last_return=-165.7 (+1 eps) [worker 0] episodes_seen=1700 last_return=-212.8 (+1 eps) [worker 2] episodes_seen=1660 last_return=-191.6 (+1 eps) [worker 1] episodes_seen=1660 last_return=-123.5 (+1 eps) [worker 0] episodes_seen=1710 last_return=-136.6 (+1 eps) [worker 2] episodes_seen=1670 last_return=-118.3 (+1 eps) [worker 1] episodes_seen=1670 last_return=-175.4 (+1 eps) [worker 0] episodes_seen=1720 last_return=-177.3 (+1 eps) [worker 2] episodes_seen=1680 last_return=-155.6 (+1 eps) [worker 1] episodes_seen=1680 last_return=-188.2 (+1 eps) [worker 0] episodes_seen=1730 last_return=-152.5 (+1 eps) [worker 2] episodes_seen=1690 last_return=-142.8 (+1 eps) [worker 1] episodes_seen=1690 last_return=-216.3 (+1 eps) [worker 0] episodes_seen=1740 last_return=-144.7 (+1 eps) [worker 2] episodes_seen=1700 last_return=-136.2 (+1 eps) [worker 1] episodes_seen=1700 last_return=-191.9 (+1 eps) [worker 2] episodes_seen=1710 last_return=-176.7 (+1 eps) [worker 0] episodes_seen=1750 last_return=-261.3 (+1 eps) [worker 1] episodes_seen=1710 last_return=-147.7 (+1 eps) [worker 2] episodes_seen=1720 last_return=-218.9 (+1 eps) [worker 0] episodes_seen=1760 last_return=-215.9 (+1 eps) [worker 1] episodes_seen=1720 last_return=-161.6 (+1 eps) [worker 2] episodes_seen=1730 last_return=-206.2 (+1 eps) [worker 0] episodes_seen=1770 last_return=-109.0 (+1 eps) [worker 1] episodes_seen=1730 last_return=-171.4 (+1 eps) [worker 2] episodes_seen=1740 last_return=-185.0 (+1 eps) [worker 0] episodes_seen=1780 last_return=-142.3 (+1 eps) [worker 2] episodes_seen=1750 last_return=-202.9 (+1 eps) [worker 0] episodes_seen=1790 last_return=-166.5 (+1 eps) [worker 1] episodes_seen=1740 last_return=-165.9 (+1 eps) [worker 2] episodes_seen=1760 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1800 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1750 last_return=-181.6 (+1 eps) [worker 2] episodes_seen=1770 last_return=-142.3 (+1 eps) [worker 0] episodes_seen=1810 last_return=-174.0 (+1 eps) [worker 1] episodes_seen=1760 last_return=-189.7 (+1 eps) [worker 2] episodes_seen=1780 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1820 last_return=-148.9 (+1 eps) [A2C][sync] it=14457 steps= 650565 (+ 45) avg10=-152.99 loss=54198693888.000 pg=-0.000 vf=98543075328.000 H=0.000 gn=4792479232.000 [worker 1] episodes_seen=1770 last_return=-134.7 (+1 eps) [worker 2] episodes_seen=1790 last_return=-107.6 (+1 eps) [worker 0] episodes_seen=1830 last_return=-188.1 (+1 eps) [worker 1] episodes_seen=1780 last_return=-143.1 (+1 eps) [worker 2] episodes_seen=1800 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1840 last_return=-109.7 (+1 eps) [worker 1] episodes_seen=1790 last_return=-174.1 (+1 eps) [worker 2] episodes_seen=1810 last_return=-166.0 (+1 eps) [worker 0] episodes_seen=1850 last_return=-138.6 (+1 eps) [worker 1] episodes_seen=1800 last_return=-176.1 (+1 eps) [worker 2] episodes_seen=1820 last_return=-163.8 (+1 eps) [worker 0] episodes_seen=1860 last_return=-109.2 (+1 eps) [worker 1] episodes_seen=1810 last_return=-162.0 (+1 eps) [worker 2] episodes_seen=1830 last_return=-150.4 (+1 eps) [worker 0] episodes_seen=1870 last_return=-140.6 (+1 eps) [worker 1] episodes_seen=1820 last_return=-179.1 (+1 eps) [worker 2] episodes_seen=1840 last_return=-144.8 (+1 eps) [worker 0] episodes_seen=1880 last_return=-159.7 (+1 eps) [worker 1] episodes_seen=1830 last_return=-170.9 (+1 eps) [worker 2] episodes_seen=1850 last_return=-162.0 (+1 eps) [worker 0] episodes_seen=1890 last_return=-258.4 (+1 eps) [worker 1] episodes_seen=1840 last_return=-223.0 (+1 eps) [worker 2] episodes_seen=1860 last_return=-235.3 (+1 eps) [worker 0] episodes_seen=1900 last_return=-161.2 (+1 eps) [worker 1] episodes_seen=1850 last_return=-195.0 (+1 eps) [worker 2] episodes_seen=1870 last_return=-207.9 (+1 eps) [worker 0] episodes_seen=1910 last_return=-137.9 (+1 eps) [worker 1] episodes_seen=1860 last_return=-131.7 (+1 eps) [worker 2] episodes_seen=1880 last_return=-219.8 (+1 eps) [worker 0] episodes_seen=1920 last_return=-176.7 (+1 eps) [worker 1] episodes_seen=1870 last_return=-241.8 (+1 eps) [worker 2] episodes_seen=1890 last_return=-185.8 (+1 eps) [worker 0] episodes_seen=1930 last_return=-208.6 (+1 eps) [worker 1] episodes_seen=1880 last_return=-183.6 (+1 eps) [worker 0] episodes_seen=1940 last_return=-212.3 (+1 eps) [worker 2] episodes_seen=1900 last_return=-175.0 (+1 eps) [worker 1] episodes_seen=1890 last_return=-172.6 (+1 eps) [worker 0] episodes_seen=1950 last_return=-192.8 (+1 eps) [worker 2] episodes_seen=1910 last_return=-213.3 (+1 eps) [worker 1] episodes_seen=1900 last_return=-204.1 (+1 eps) [worker 0] episodes_seen=1960 last_return=-207.0 (+1 eps) [worker 2] episodes_seen=1920 last_return=-119.5 (+1 eps) [worker 1] episodes_seen=1910 last_return=-204.7 (+1 eps) [A2C][sync] it=15569 steps= 700605 (+ 45) avg10=-189.25 loss=82667069440.000 pg=-0.000 vf=150303752192.000 H=0.000 gn=8562867712.000 [worker 0] episodes_seen=1970 last_return=-127.3 (+1 eps) [worker 2] episodes_seen=1930 last_return=-203.8 (+1 eps) [worker 1] episodes_seen=1920 last_return=-250.9 (+1 eps) [worker 0] episodes_seen=1980 last_return=-130.5 (+1 eps) [worker 2] episodes_seen=1940 last_return=-122.7 (+1 eps) [worker 1] episodes_seen=1930 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1990 last_return=-165.5 (+1 eps) [worker 2] episodes_seen=1950 last_return=-136.7 (+1 eps) [worker 1] episodes_seen=1940 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1960 last_return=-121.3 (+1 eps) [worker 0] episodes_seen=2000 last_return=-162.8 (+1 eps) [worker 1] episodes_seen=1950 last_return=-107.1 (+1 eps) [worker 2] episodes_seen=1970 last_return=-164.3 (+1 eps) [worker 0] episodes_seen=2010 last_return=-124.4 (+1 eps) [worker 1] episodes_seen=1960 last_return=-199.8 (+1 eps) [worker 0] episodes_seen=2020 last_return=-152.0 (+1 eps) [worker 2] episodes_seen=1980 last_return=-167.5 (+1 eps) [worker 1] episodes_seen=1970 last_return=-156.1 (+1 eps) [worker 0] episodes_seen=2030 last_return=-115.8 (+1 eps) [worker 2] episodes_seen=1990 last_return=-163.3 (+1 eps) [worker 1] episodes_seen=1980 last_return=-141.4 (+1 eps) [worker 0] episodes_seen=2040 last_return=-140.3 (+1 eps) [worker 2] episodes_seen=2000 last_return=-162.1 (+1 eps) [worker 1] episodes_seen=1990 last_return=-120.8 (+1 eps) [worker 0] episodes_seen=2050 last_return=-121.0 (+1 eps) [worker 2] episodes_seen=2010 last_return=-271.7 (+1 eps) [worker 1] episodes_seen=2000 last_return=-153.3 (+1 eps) [worker 0] episodes_seen=2060 last_return=-130.9 (+1 eps) [worker 2] episodes_seen=2020 last_return=-247.2 (+1 eps) [worker 1] episodes_seen=2010 last_return=-265.4 (+1 eps) [worker 0] episodes_seen=2070 last_return=-186.3 (+1 eps) [worker 2] episodes_seen=2030 last_return=-185.6 (+1 eps) [worker 1] episodes_seen=2020 last_return=-221.7 (+1 eps) [worker 0] episodes_seen=2080 last_return=-180.3 (+1 eps) [worker 2] episodes_seen=2040 last_return=-293.2 (+1 eps) [worker 1] episodes_seen=2030 last_return=-133.8 (+1 eps) [worker 0] episodes_seen=2090 last_return=-206.4 (+1 eps) [worker 2] episodes_seen=2050 last_return=-156.1 (+1 eps) [worker 1] episodes_seen=2040 last_return=-111.7 (+1 eps) [A2C][sync] it=16681 steps= 750645 (+ 45) avg10=-143.64 loss=36545347584.000 pg=-0.000 vf=66446082048.000 H=0.000 gn=2281964544.000 [worker 0] episodes_seen=2100 last_return=-149.9 (+1 eps) [worker 2] episodes_seen=2060 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2050 last_return=-168.7 (+1 eps) [worker 0] episodes_seen=2110 last_return=-206.4 (+1 eps) [worker 1] episodes_seen=2060 last_return=-279.4 (+1 eps) [worker 0] episodes_seen=2120 last_return=-230.1 (+1 eps) [worker 2] episodes_seen=2070 last_return=-128.0 (+1 eps) [worker 1] episodes_seen=2070 last_return=-132.5 (+1 eps) [worker 2] episodes_seen=2080 last_return=-192.7 (+1 eps) [worker 0] episodes_seen=2130 last_return=-132.7 (+1 eps) [worker 0] episodes_seen=2140 last_return=-182.0 (+1 eps) [worker 2] episodes_seen=2090 last_return=-208.5 (+1 eps) [worker 1] episodes_seen=2080 last_return=-114.6 (+1 eps) [worker 2] episodes_seen=2100 last_return=-137.4 (+1 eps) [worker 0] episodes_seen=2150 last_return=-135.7 (+1 eps) [worker 1] episodes_seen=2090 last_return=-225.0 (+1 eps) [worker 2] episodes_seen=2110 last_return=-185.4 (+1 eps) [worker 0] episodes_seen=2160 last_return=-140.3 (+1 eps) [worker 1] episodes_seen=2100 last_return=-196.3 (+1 eps) [worker 2] episodes_seen=2120 last_return=-133.0 (+1 eps) [worker 0] episodes_seen=2170 last_return=-182.3 (+1 eps) [worker 1] episodes_seen=2110 last_return=-134.9 (+1 eps) [worker 2] episodes_seen=2130 last_return=-156.6 (+1 eps) [worker 0] episodes_seen=2180 last_return=-163.9 (+1 eps) [worker 1] episodes_seen=2120 last_return=-121.1 (+1 eps) [worker 2] episodes_seen=2140 last_return=-113.3 (+1 eps) [worker 0] episodes_seen=2190 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2130 last_return=-158.6 (+1 eps) [worker 2] episodes_seen=2150 last_return=-176.6 (+1 eps) [worker 0] episodes_seen=2200 last_return=-216.9 (+1 eps) [worker 2] episodes_seen=2160 last_return=-190.5 (+1 eps) [worker 1] episodes_seen=2140 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=2210 last_return=-218.9 (+1 eps) [worker 2] episodes_seen=2170 last_return=-144.8 (+1 eps) [worker 1] episodes_seen=2150 last_return=-230.1 (+1 eps) [worker 0] episodes_seen=2220 last_return=-150.3 (+1 eps) [worker 2] episodes_seen=2180 last_return=-141.1 (+1 eps) [worker 1] episodes_seen=2160 last_return=-112.4 (+1 eps) [worker 0] episodes_seen=2230 last_return=-133.2 (+1 eps) [worker 2] episodes_seen=2190 last_return=-154.3 (+1 eps) [worker 1] episodes_seen=2170 last_return=-198.5 (+1 eps) [A2C][sync] it=17793 steps= 800685 (+ 45) avg10=-171.05 loss=56038281216.000 pg=-0.000 vf=101887778816.000 H=0.000 gn=3469776128.000 [worker 0] episodes_seen=2240 last_return=-152.5 (+1 eps) [worker 2] episodes_seen=2200 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2180 last_return=-110.3 (+1 eps) [worker 0] episodes_seen=2250 last_return=-110.0 (+1 eps) [worker 2] episodes_seen=2210 last_return=-219.5 (+1 eps) [worker 1] episodes_seen=2190 last_return=-174.0 (+1 eps) [worker 0] episodes_seen=2260 last_return=-179.0 (+1 eps) [worker 2] episodes_seen=2220 last_return=-127.5 (+1 eps) [worker 1] episodes_seen=2200 last_return=-193.9 (+1 eps) [worker 0] episodes_seen=2270 last_return=-117.7 (+1 eps) [worker 2] episodes_seen=2230 last_return=-135.9 (+1 eps) [worker 1] episodes_seen=2210 last_return=-176.1 (+1 eps) [worker 0] episodes_seen=2280 last_return=-155.6 (+1 eps) [worker 2] episodes_seen=2240 last_return=-152.5 (+1 eps) [worker 1] episodes_seen=2220 last_return=-118.2 (+1 eps) [worker 0] episodes_seen=2290 last_return=-150.6 (+1 eps) [worker 2] episodes_seen=2250 last_return=-181.5 (+1 eps) [worker 1] episodes_seen=2230 last_return=-187.6 (+1 eps) [worker 0] episodes_seen=2300 last_return=-139.4 (+1 eps) [worker 2] episodes_seen=2260 last_return=-193.8 (+1 eps) [worker 1] episodes_seen=2240 last_return=-109.1 (+1 eps) [worker 0] episodes_seen=2310 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=2270 last_return=-178.9 (+1 eps) [worker 0] episodes_seen=2320 last_return=-117.8 (+1 eps) [worker 1] episodes_seen=2250 last_return=-147.4 (+1 eps) [worker 2] episodes_seen=2280 last_return=-224.4 (+1 eps) [worker 1] episodes_seen=2260 last_return=-248.9 (+1 eps) [worker 0] episodes_seen=2330 last_return=-205.3 (+1 eps) [worker 2] episodes_seen=2290 last_return=-240.7 (+1 eps) [worker 0] episodes_seen=2340 last_return=-130.7 (+1 eps) [worker 1] episodes_seen=2270 last_return=-178.7 (+1 eps) [worker 1] episodes_seen=2280 last_return=-116.9 (+1 eps) [worker 2] episodes_seen=2300 last_return=-141.4 (+1 eps) [worker 0] episodes_seen=2350 last_return=-152.1 (+1 eps) [worker 1] episodes_seen=2290 last_return=-114.5 (+1 eps) [worker 2] episodes_seen=2310 last_return=-219.5 (+1 eps) [worker 0] episodes_seen=2360 last_return=-221.7 (+1 eps) [worker 1] episodes_seen=2300 last_return=-166.1 (+1 eps) [worker 2] episodes_seen=2320 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=2370 last_return=-119.2 (+1 eps) [A2C][sync] it=18905 steps= 850725 (+ 45) avg10=-138.70 loss=3131525632.000 pg=-0.000 vf=5693682688.000 H=0.000 gn=3576354816.000 [worker 1] episodes_seen=2310 last_return=-189.5 (+1 eps) [worker 2] episodes_seen=2330 last_return=-237.5 (+1 eps) [worker 0] episodes_seen=2380 last_return=-115.9 (+1 eps) [worker 1] episodes_seen=2320 last_return=-199.4 (+1 eps) [worker 2] episodes_seen=2340 last_return=-200.0 (+1 eps) [worker 0] episodes_seen=2390 last_return=-107.4 (+1 eps) [worker 2] episodes_seen=2350 last_return=-162.9 (+1 eps) [worker 0] episodes_seen=2400 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2330 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=2360 last_return=-124.5 (+1 eps) [worker 1] episodes_seen=2340 last_return=-135.2 (+1 eps) [worker 0] episodes_seen=2410 last_return=-144.2 (+1 eps) [worker 2] episodes_seen=2370 last_return=-164.2 (+1 eps) [worker 1] episodes_seen=2350 last_return=-163.1 (+1 eps) [worker 0] episodes_seen=2420 last_return=-157.5 (+1 eps) [worker 2] episodes_seen=2380 last_return=-209.6 (+1 eps) [worker 1] episodes_seen=2360 last_return=-191.8 (+1 eps) [worker 0] episodes_seen=2430 last_return=-153.8 (+1 eps) [worker 2] episodes_seen=2390 last_return=-183.2 (+1 eps) [worker 1] episodes_seen=2370 last_return=-197.5 (+1 eps) [worker 0] episodes_seen=2440 last_return=-237.7 (+1 eps) [worker 2] episodes_seen=2400 last_return=-220.7 (+1 eps) [worker 1] episodes_seen=2380 last_return=-128.4 (+1 eps) [worker 0] episodes_seen=2450 last_return=-154.3 (+1 eps) [worker 2] episodes_seen=2410 last_return=-191.1 (+1 eps) [worker 1] episodes_seen=2390 last_return=-183.4 (+1 eps) [worker 0] episodes_seen=2460 last_return=-158.4 (+1 eps) [worker 2] episodes_seen=2420 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2400 last_return=-233.6 (+1 eps) [worker 0] episodes_seen=2470 last_return=-186.3 (+1 eps) [worker 2] episodes_seen=2430 last_return=-128.7 (+1 eps) [worker 0] episodes_seen=2480 last_return=-161.0 (+1 eps) [worker 1] episodes_seen=2410 last_return=-214.0 (+1 eps) [worker 2] episodes_seen=2440 last_return=-118.3 (+1 eps) [worker 0] episodes_seen=2490 last_return=-289.1 (+1 eps) [worker 1] episodes_seen=2420 last_return=-244.3 (+1 eps) [worker 2] episodes_seen=2450 last_return=-158.4 (+1 eps) [worker 2] episodes_seen=2460 last_return=-203.9 (+1 eps) [A2C][sync] it=20017 steps= 900765 (+ 45) avg10=-190.07 loss=5090254848.000 pg=-0.000 vf=9255008256.000 H=0.000 gn=7375058944.000 [worker 0] episodes_seen=2500 last_return=-201.3 (+1 eps) [worker 1] episodes_seen=2430 last_return=-131.5 (+1 eps) [worker 1] episodes_seen=2440 last_return=-148.8 (+1 eps) [worker 2] episodes_seen=2470 last_return=-235.6 (+1 eps) [worker 0] episodes_seen=2510 last_return=-170.0 (+1 eps) [worker 1] episodes_seen=2450 last_return=-149.9 (+1 eps) [worker 2] episodes_seen=2480 last_return=-121.5 (+1 eps) [worker 0] episodes_seen=2520 last_return=-153.5 (+1 eps) [worker 1] episodes_seen=2460 last_return=-120.9 (+1 eps) [worker 2] episodes_seen=2490 last_return=-157.4 (+1 eps) [worker 0] episodes_seen=2530 last_return=-211.9 (+1 eps) [worker 1] episodes_seen=2470 last_return=-156.9 (+1 eps) [worker 2] episodes_seen=2500 last_return=-214.9 (+1 eps) [worker 1] episodes_seen=2480 last_return=-204.3 (+1 eps) [worker 0] episodes_seen=2540 last_return=-130.8 (+1 eps) [worker 2] episodes_seen=2510 last_return=-174.2 (+1 eps) [worker 0] episodes_seen=2550 last_return=-212.9 (+1 eps) [worker 1] episodes_seen=2490 last_return=-108.9 (+1 eps) [worker 2] episodes_seen=2520 last_return=-166.1 (+1 eps) [worker 0] episodes_seen=2560 last_return=-131.0 (+1 eps) [worker 1] episodes_seen=2500 last_return=-180.7 (+1 eps) [worker 2] episodes_seen=2530 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=2570 last_return=-185.5 (+1 eps) [worker 1] episodes_seen=2510 last_return=-171.5 (+1 eps) [worker 2] episodes_seen=2540 last_return=-210.8 (+1 eps) [worker 1] episodes_seen=2520 last_return=-118.9 (+1 eps) [worker 0] episodes_seen=2580 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=2550 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2530 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=2590 last_return=-237.6 (+1 eps) [worker 2] episodes_seen=2560 last_return=-117.7 (+1 eps) [worker 0] episodes_seen=2600 last_return=-138.6 (+1 eps) [worker 1] episodes_seen=2540 last_return=-146.0 (+1 eps) [worker 2] episodes_seen=2570 last_return=-125.9 (+1 eps) [worker 0] episodes_seen=2610 last_return=-121.0 (+1 eps) [worker 1] episodes_seen=2550 last_return=-208.0 (+1 eps) [worker 2] episodes_seen=2580 last_return=-185.0 (+1 eps) [worker 1] episodes_seen=2560 last_return=-218.5 (+1 eps) [worker 0] episodes_seen=2620 last_return=-201.4 (+1 eps) [worker 1] episodes_seen=2570 last_return=-202.7 (+1 eps) [worker 2] episodes_seen=2590 last_return=-189.2 (+1 eps) [A2C][sync] it=21129 steps= 950805 (+ 45) avg10=-195.73 loss=6308062208.000 pg=-0.000 vf=11469203456.000 H=0.000 gn=7278251008.000 [worker 0] episodes_seen=2630 last_return=-220.4 (+1 eps) [worker 2] episodes_seen=2600 last_return=-203.2 (+1 eps) [worker 1] episodes_seen=2580 last_return=-213.5 (+1 eps) [worker 0] episodes_seen=2640 last_return=-163.3 (+1 eps) [worker 2] episodes_seen=2610 last_return=-189.8 (+1 eps) [worker 1] episodes_seen=2590 last_return=-143.6 (+1 eps) [worker 0] episodes_seen=2650 last_return=-230.7 (+1 eps) [worker 2] episodes_seen=2620 last_return=-188.8 (+1 eps) [worker 1] episodes_seen=2600 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=2660 last_return=-121.6 (+1 eps) [worker 2] episodes_seen=2630 last_return=-126.6 (+1 eps) [worker 1] episodes_seen=2610 last_return=-160.9 (+1 eps) [worker 0] episodes_seen=2670 last_return=-172.9 (+1 eps) [worker 2] episodes_seen=2640 last_return=-190.0 (+1 eps) [worker 1] episodes_seen=2620 last_return=-190.6 (+1 eps) [worker 0] episodes_seen=2680 last_return=-171.2 (+1 eps) [worker 2] episodes_seen=2650 last_return=-165.7 (+1 eps) [worker 1] episodes_seen=2630 last_return=-207.5 (+1 eps) [worker 0] episodes_seen=2690 last_return=-215.3 (+1 eps) [worker 2] episodes_seen=2660 last_return=-256.5 (+1 eps) [worker 1] episodes_seen=2640 last_return=-124.4 (+1 eps) [worker 0] episodes_seen=2700 last_return=-181.4 (+1 eps) [worker 2] episodes_seen=2670 last_return=-124.8 (+1 eps) [worker 1] episodes_seen=2650 last_return=-204.2 (+1 eps) [worker 0] episodes_seen=2710 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=2680 last_return=-176.6 (+1 eps) [worker 1] episodes_seen=2660 last_return=-121.4 (+1 eps) [worker 0] episodes_seen=2720 last_return=-166.8 (+1 eps) [worker 2] episodes_seen=2690 last_return=-169.1 (+1 eps) [worker 0] episodes_seen=2730 last_return=-167.1 (+1 eps) [worker 1] episodes_seen=2670 last_return=-200.4 (+1 eps) [worker 2] episodes_seen=2700 last_return=-252.8 (+1 eps) [worker 0] episodes_seen=2740 last_return=-159.6 (+1 eps) [worker 1] episodes_seen=2680 last_return=-187.5 (+1 eps) [worker 2] episodes_seen=2710 last_return=-122.0 (+1 eps) [worker 0] episodes_seen=2750 last_return=-183.2 (+1 eps) [worker 1] episodes_seen=2690 last_return=-137.3 (+1 eps) [worker 2] episodes_seen=2720 last_return=-158.8 (+1 eps) [A2C][sync] it=22223 steps= 1000035 (+ 45) avg10=-156.48 loss=4386492928.000 pg=-0.000 vf=7975441920.000 H=0.000 gn=4994435072.000 [Checkpoint] Saved self-describing checkpoint → part2_artifacts/checkpoints/a2c_run18_seed1227.pth [A2C][sync] done: steps=1000035 time=1200.7s avg10=-156.48
[Run run18_seed1227] checkpoint: part2_artifacts/checkpoints/a2c_run18_seed1227.pth [Run run18_seed1227] training plot (tail 500): part2_artifacts/train_curve_run18_seed1227.png [Run run18_seed1227] training plot (full): part2_artifacts/train_curve_full_run18_seed1227.png [Run run18_seed1227] per-worker plot: part2_artifacts/train_curve_workers_run18_seed1227.png [Run run18_seed1227] workers-average plot: part2_artifacts/train_curve_workers_avg_run18_seed1227.png
[Eval run18_seed1227] mean=-693.82 std=113.17 min=-927.28 max=-562.23 [Eval run18_seed1227] CSV: part2_artifacts/eval10_run18_seed1227.csv [Eval run18_seed1227] plot: part2_artifacts/eval10_run18_seed1227.png [Best] ep=6 return=-562.23 seed=1233 [Video run18_seed1227] episode return=-562.23 [Video run18_seed1227] saved under: part2_artifacts/videos run18_seed1227 | mean=-693.8±113.2 | best_ep=6, best_ret=-562.2
Run#19
run_id = f"run19_seed{SEED}"
model, logs, paths = train_once(
run_id=run_id,
n_workers=4,
total_env_steps=1_400_000,
T=30,
gamma=0.99,
entropy_coef=0.005,
value_coef=0.65,
max_grad_norm=0.5,
lr=2e-4,
log_every=70_000,
)
metrics, _ = evaluate_10(run_id, paths.ckpt_path)
video_dir, best_idx, best_ret = record_best_from_eval(run_id, paths.ckpt_path)
print(f"{run_id} | mean={metrics['mean']:.1f}±{metrics['std']:.1f} | best_ep={best_idx}, best_ret={best_ret:.1f}")
[Run run19_seed1227] starting training… [A2C][sync] start: workers=4, T=30, target_steps=1400000, mp=fork [A2C][sync] it= 1 steps= 120 (+120) avg10= nan loss=91.747 pg=-0.000 vf=141.160 H=1.386 gn=33.784 [worker 3] episodes_seen=10 last_return=-73.0 (+1 eps) [worker 2] episodes_seen=10 last_return=-149.8 (+1 eps) [worker 1] episodes_seen=10 last_return=-81.9 (+1 eps) [worker 0] episodes_seen=10 last_return=-102.1 (+1 eps) [worker 3] episodes_seen=20 last_return=47.3 (+1 eps) [worker 0] episodes_seen=20 last_return=-69.9 (+1 eps) [worker 2] episodes_seen=20 last_return=-66.7 (+1 eps) [worker 1] episodes_seen=20 last_return=-61.3 (+1 eps) [worker 3] episodes_seen=30 last_return=-127.2 (+1 eps) [worker 1] episodes_seen=30 last_return=-42.8 (+1 eps) [worker 0] episodes_seen=30 last_return=-191.3 (+1 eps) [worker 2] episodes_seen=30 last_return=-199.0 (+1 eps) [worker 3] episodes_seen=40 last_return=-103.2 (+1 eps) [worker 0] episodes_seen=40 last_return=0.9 (+1 eps) [worker 1] episodes_seen=40 last_return=-196.3 (+1 eps) [worker 2] episodes_seen=40 last_return=-97.4 (+1 eps) [worker 3] episodes_seen=50 last_return=-313.2 (+1 eps) [worker 0] episodes_seen=50 last_return=-208.8 (+1 eps) [worker 2] episodes_seen=50 last_return=-228.1 (+1 eps) [worker 1] episodes_seen=50 last_return=-200.5 (+1 eps) [worker 3] episodes_seen=60 last_return=-268.5 (+1 eps) [worker 0] episodes_seen=60 last_return=-218.6 (+1 eps) [worker 2] episodes_seen=60 last_return=-203.7 (+1 eps) [worker 1] episodes_seen=60 last_return=-86.9 (+1 eps) [worker 3] episodes_seen=70 last_return=-158.7 (+1 eps) [worker 0] episodes_seen=70 last_return=-122.2 (+1 eps) [worker 2] episodes_seen=70 last_return=-196.0 (+1 eps) [worker 1] episodes_seen=70 last_return=-243.8 (+1 eps) [worker 3] episodes_seen=80 last_return=-108.5 (+1 eps) [worker 0] episodes_seen=80 last_return=-262.2 (+1 eps) [worker 1] episodes_seen=80 last_return=-214.2 (+1 eps) [worker 2] episodes_seen=80 last_return=-87.8 (+1 eps) [worker 3] episodes_seen=90 last_return=-224.5 (+1 eps) [worker 0] episodes_seen=90 last_return=-176.0 (+1 eps) [worker 1] episodes_seen=90 last_return=-243.1 (+1 eps) [worker 2] episodes_seen=90 last_return=-153.5 (+1 eps) [worker 3] episodes_seen=100 last_return=-245.0 (+1 eps) [worker 1] episodes_seen=100 last_return=-319.0 (+1 eps) [worker 0] episodes_seen=100 last_return=-239.3 (+1 eps) [worker 2] episodes_seen=100 last_return=-197.4 (+1 eps) [worker 3] episodes_seen=110 last_return=-91.0 (+1 eps) [worker 1] episodes_seen=110 last_return=-153.8 (+1 eps) [worker 0] episodes_seen=110 last_return=-156.4 (+1 eps) [worker 2] episodes_seen=110 last_return=-144.1 (+1 eps) [worker 3] episodes_seen=120 last_return=-138.0 (+1 eps) [worker 1] episodes_seen=120 last_return=-155.7 (+1 eps) [worker 0] episodes_seen=120 last_return=-194.8 (+1 eps) [worker 2] episodes_seen=120 last_return=-312.7 (+1 eps) [worker 3] episodes_seen=130 last_return=-173.0 (+1 eps) [worker 1] episodes_seen=130 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=130 last_return=-337.2 (+1 eps) [worker 2] episodes_seen=130 last_return=-190.2 (+1 eps) [worker 0] episodes_seen=140 last_return=-168.1 (+1 eps) [worker 1] episodes_seen=140 last_return=-197.1 (+1 eps) [worker 3] episodes_seen=140 last_return=-13.0 (+1 eps) [worker 2] episodes_seen=140 last_return=-158.9 (+1 eps) [worker 1] episodes_seen=150 last_return=-136.7 (+1 eps) [worker 0] episodes_seen=150 last_return=-315.8 (+1 eps) [worker 3] episodes_seen=150 last_return=-375.2 (+1 eps) [worker 2] episodes_seen=150 last_return=-114.0 (+1 eps) [worker 0] episodes_seen=160 last_return=-106.9 (+1 eps) [worker 3] episodes_seen=160 last_return=-289.6 (+1 eps) [worker 1] episodes_seen=160 last_return=-277.3 (+1 eps) [worker 2] episodes_seen=160 last_return=-117.2 (+1 eps) [worker 0] episodes_seen=170 last_return=-240.7 (+1 eps) [worker 3] episodes_seen=170 last_return=-138.4 (+1 eps) [worker 1] episodes_seen=170 last_return=-129.5 (+1 eps) [worker 2] episodes_seen=170 last_return=-131.6 (+1 eps) [worker 0] episodes_seen=180 last_return=-363.5 (+1 eps) [worker 3] episodes_seen=180 last_return=-203.5 (+1 eps) [worker 1] episodes_seen=180 last_return=-165.9 (+1 eps) [worker 2] episodes_seen=180 last_return=-262.4 (+1 eps) [A2C][sync] it= 585 steps= 70200 (+120) avg10=-257.31 loss=1785.770 pg=0.010 vf=2747.332 H=1.061 gn=1259.942 [worker 3] episodes_seen=190 last_return=-120.2 (+1 eps) [worker 1] episodes_seen=190 last_return=-228.9 (+1 eps) [worker 2] episodes_seen=190 last_return=-319.7 (+1 eps) [worker 0] episodes_seen=190 last_return=-220.1 (+1 eps) [worker 3] episodes_seen=200 last_return=-290.1 (+1 eps) [worker 1] episodes_seen=200 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=200 last_return=-116.2 (+1 eps) [worker 0] episodes_seen=200 last_return=-155.1 (+1 eps) [worker 3] episodes_seen=210 last_return=-309.5 (+1 eps) [worker 2] episodes_seen=210 last_return=-177.3 (+1 eps) [worker 1] episodes_seen=210 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=210 last_return=-220.9 (+1 eps) [worker 3] episodes_seen=220 last_return=-93.8 (+1 eps) [worker 0] episodes_seen=220 last_return=-313.0 (+1 eps) [worker 2] episodes_seen=220 last_return=-290.7 (+1 eps) [worker 1] episodes_seen=220 last_return=-133.7 (+1 eps) [worker 3] episodes_seen=230 last_return=-360.6 (+1 eps) [worker 2] episodes_seen=230 last_return=-198.5 (+1 eps) [worker 0] episodes_seen=230 last_return=-231.3 (+1 eps) [worker 1] episodes_seen=230 last_return=-248.6 (+1 eps) [worker 3] episodes_seen=240 last_return=-224.8 (+1 eps) [worker 2] episodes_seen=240 last_return=-192.8 (+1 eps) [worker 0] episodes_seen=240 last_return=-252.9 (+1 eps) [worker 1] episodes_seen=240 last_return=-152.2 (+1 eps) [worker 3] episodes_seen=250 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=250 last_return=-37.4 (+1 eps) [worker 1] episodes_seen=250 last_return=-480.3 (+1 eps) [worker 0] episodes_seen=250 last_return=-112.4 (+1 eps) [worker 3] episodes_seen=260 last_return=-491.0 (+1 eps) [worker 2] episodes_seen=260 last_return=-330.6 (+1 eps) [worker 1] episodes_seen=260 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=260 last_return=-246.2 (+1 eps) [worker 3] episodes_seen=270 last_return=-210.4 (+1 eps) [worker 2] episodes_seen=270 last_return=-150.5 (+1 eps) [worker 0] episodes_seen=270 last_return=-189.0 (+1 eps) [worker 1] episodes_seen=270 last_return=-245.8 (+1 eps) [worker 3] episodes_seen=280 last_return=-96.4 (+1 eps) [worker 2] episodes_seen=280 last_return=-129.4 (+1 eps) [worker 1] episodes_seen=280 last_return=-178.9 (+1 eps) [worker 0] episodes_seen=280 last_return=-139.3 (+1 eps) [worker 2] episodes_seen=290 last_return=-177.6 (+1 eps) [worker 3] episodes_seen=290 last_return=-126.0 (+1 eps) [worker 0] episodes_seen=290 last_return=-110.9 (+1 eps) [worker 1] episodes_seen=290 last_return=-133.5 (+1 eps) [worker 3] episodes_seen=300 last_return=-151.8 (+1 eps) [worker 2] episodes_seen=300 last_return=-261.6 (+1 eps) [worker 1] episodes_seen=300 last_return=-264.6 (+1 eps) [worker 0] episodes_seen=300 last_return=-216.2 (+1 eps) [worker 2] episodes_seen=310 last_return=-289.7 (+1 eps) [worker 3] episodes_seen=310 last_return=-126.3 (+1 eps) [worker 1] episodes_seen=310 last_return=-176.6 (+1 eps) [worker 0] episodes_seen=310 last_return=-160.0 (+1 eps) [worker 2] episodes_seen=320 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=320 last_return=-103.6 (+1 eps) [worker 1] episodes_seen=320 last_return=-225.9 (+1 eps) [worker 0] episodes_seen=320 last_return=-127.9 (+1 eps) [worker 2] episodes_seen=330 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=330 last_return=-181.9 (+1 eps) [worker 0] episodes_seen=330 last_return=-187.1 (+1 eps) [worker 1] episodes_seen=330 last_return=-239.5 (+1 eps) [worker 2] episodes_seen=340 last_return=-114.4 (+1 eps) [worker 3] episodes_seen=340 last_return=-232.9 (+1 eps) [worker 0] episodes_seen=340 last_return=-140.5 (+1 eps) [worker 1] episodes_seen=340 last_return=-209.0 (+1 eps) [worker 3] episodes_seen=350 last_return=-339.6 (+1 eps) [worker 2] episodes_seen=350 last_return=-149.0 (+1 eps) [worker 0] episodes_seen=350 last_return=-320.7 (+1 eps) [worker 1] episodes_seen=350 last_return=-147.5 (+1 eps) [worker 2] episodes_seen=360 last_return=-116.7 (+1 eps) [worker 3] episodes_seen=360 last_return=-276.3 (+1 eps) [worker 0] episodes_seen=360 last_return=-141.8 (+1 eps) [worker 1] episodes_seen=360 last_return=-151.9 (+1 eps) [worker 2] episodes_seen=370 last_return=-164.4 (+1 eps) [worker 0] episodes_seen=370 last_return=-103.6 (+1 eps) [worker 3] episodes_seen=370 last_return=-287.5 (+1 eps) [worker 1] episodes_seen=370 last_return=-130.6 (+1 eps) [A2C][sync] it= 1169 steps= 140280 (+120) avg10=-240.72 loss=7262.731 pg=0.027 vf=11173.396 H=0.816 gn=40911.352 [worker 0] episodes_seen=380 last_return=-127.5 (+1 eps) [worker 3] episodes_seen=380 last_return=-426.4 (+1 eps) [worker 2] episodes_seen=380 last_return=-187.4 (+1 eps) [worker 1] episodes_seen=380 last_return=-148.5 (+1 eps) [worker 3] episodes_seen=390 last_return=-120.8 (+1 eps) [worker 0] episodes_seen=390 last_return=-111.2 (+1 eps) [worker 2] episodes_seen=390 last_return=-168.6 (+1 eps) [worker 1] episodes_seen=390 last_return=-203.5 (+1 eps) [worker 2] episodes_seen=400 last_return=-236.8 (+1 eps) [worker 0] episodes_seen=400 last_return=-296.4 (+1 eps) [worker 3] episodes_seen=400 last_return=-126.4 (+1 eps) [worker 1] episodes_seen=400 last_return=-114.8 (+1 eps) [worker 2] episodes_seen=410 last_return=-203.0 (+1 eps) [worker 0] episodes_seen=410 last_return=-88.6 (+1 eps) [worker 3] episodes_seen=410 last_return=-281.8 (+1 eps) [worker 1] episodes_seen=410 last_return=-220.2 (+1 eps) [worker 0] episodes_seen=420 last_return=-271.0 (+1 eps) [worker 3] episodes_seen=420 last_return=-193.1 (+1 eps) [worker 2] episodes_seen=420 last_return=-113.5 (+1 eps) [worker 1] episodes_seen=420 last_return=-137.9 (+1 eps) [worker 0] episodes_seen=430 last_return=-166.1 (+1 eps) [worker 3] episodes_seen=430 last_return=-140.5 (+1 eps) [worker 2] episodes_seen=430 last_return=-235.3 (+1 eps) [worker 1] episodes_seen=430 last_return=-159.9 (+1 eps) [worker 0] episodes_seen=440 last_return=-360.8 (+1 eps) [worker 3] episodes_seen=440 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=440 last_return=-127.9 (+1 eps) [worker 1] episodes_seen=440 last_return=-72.6 (+1 eps) [worker 0] episodes_seen=450 last_return=-295.2 (+1 eps) [worker 3] episodes_seen=450 last_return=-129.1 (+1 eps) [worker 2] episodes_seen=450 last_return=-55.0 (+1 eps) [worker 1] episodes_seen=450 last_return=-84.0 (+1 eps) [worker 0] episodes_seen=460 last_return=-282.7 (+1 eps) [worker 3] episodes_seen=460 last_return=-249.2 (+1 eps) [worker 1] episodes_seen=460 last_return=-131.3 (+1 eps) [worker 2] episodes_seen=460 last_return=-105.6 (+1 eps) [worker 0] episodes_seen=470 last_return=-303.8 (+1 eps) [worker 3] episodes_seen=470 last_return=-114.5 (+1 eps) [worker 2] episodes_seen=470 last_return=-170.2 (+1 eps) [worker 1] episodes_seen=470 last_return=-114.9 (+1 eps) [worker 0] episodes_seen=480 last_return=-51.7 (+1 eps) [worker 1] episodes_seen=480 last_return=-255.4 (+1 eps) [worker 2] episodes_seen=480 last_return=-158.5 (+1 eps) [worker 0] episodes_seen=490 last_return=-182.3 (+1 eps) [worker 3] episodes_seen=480 last_return=-127.6 (+1 eps) [worker 1] episodes_seen=490 last_return=-186.1 (+1 eps) [worker 2] episodes_seen=490 last_return=-226.3 (+1 eps) [worker 0] episodes_seen=500 last_return=-196.8 (+1 eps) [worker 3] episodes_seen=490 last_return=-247.4 (+1 eps) [worker 0] episodes_seen=510 last_return=68.6 (+1 eps) [worker 1] episodes_seen=500 last_return=-208.9 (+1 eps) [worker 2] episodes_seen=500 last_return=-108.5 (+1 eps) [worker 3] episodes_seen=500 last_return=-82.7 (+1 eps) [worker 0] episodes_seen=520 last_return=-226.1 (+1 eps) [worker 1] episodes_seen=510 last_return=-72.2 (+1 eps) [worker 2] episodes_seen=510 last_return=-73.3 (+1 eps) [worker 3] episodes_seen=510 last_return=-270.0 (+1 eps) [worker 0] episodes_seen=530 last_return=-154.1 (+1 eps) [worker 1] episodes_seen=520 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=520 last_return=-282.6 (+1 eps) [worker 3] episodes_seen=520 last_return=-267.6 (+1 eps) [worker 1] episodes_seen=530 last_return=-246.8 (+1 eps) [worker 0] episodes_seen=540 last_return=-264.8 (+1 eps) [worker 2] episodes_seen=530 last_return=-68.9 (+1 eps) [worker 3] episodes_seen=530 last_return=-308.4 (+1 eps) [worker 0] episodes_seen=550 last_return=-379.7 (+1 eps) [worker 1] episodes_seen=540 last_return=-148.5 (+1 eps) [worker 2] episodes_seen=540 last_return=-165.8 (+1 eps) [worker 3] episodes_seen=540 last_return=-241.7 (+1 eps) [worker 1] episodes_seen=550 last_return=-197.2 (+1 eps) [worker 0] episodes_seen=560 last_return=-182.6 (+1 eps) [worker 2] episodes_seen=550 last_return=-41.1 (+1 eps) [worker 3] episodes_seen=550 last_return=-153.4 (+1 eps) [worker 1] episodes_seen=560 last_return=-39.9 (+1 eps) [A2C][sync] it= 1753 steps= 210360 (+120) avg10=-162.08 loss=1065.896 pg=0.001 vf=1639.846 H=1.099 gn=3626.057 [worker 0] episodes_seen=570 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=560 last_return=-229.8 (+1 eps) [worker 2] episodes_seen=560 last_return=-118.1 (+1 eps) [worker 1] episodes_seen=570 last_return=-91.4 (+1 eps) [worker 0] episodes_seen=580 last_return=-101.0 (+1 eps) [worker 3] episodes_seen=570 last_return=-118.3 (+1 eps) [worker 2] episodes_seen=570 last_return=-187.4 (+1 eps) [worker 1] episodes_seen=580 last_return=-229.2 (+1 eps) [worker 0] episodes_seen=590 last_return=-186.9 (+1 eps) [worker 3] episodes_seen=580 last_return=-103.3 (+1 eps) [worker 2] episodes_seen=580 last_return=-78.6 (+1 eps) [worker 1] episodes_seen=590 last_return=-79.7 (+1 eps) [worker 0] episodes_seen=600 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=590 last_return=-89.2 (+1 eps) [worker 2] episodes_seen=590 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=600 last_return=-92.1 (+1 eps) [worker 0] episodes_seen=610 last_return=-138.7 (+1 eps) [worker 3] episodes_seen=600 last_return=-126.6 (+1 eps) [worker 2] episodes_seen=600 last_return=-84.3 (+1 eps) [worker 1] episodes_seen=610 last_return=-80.5 (+1 eps) [worker 0] episodes_seen=620 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=610 last_return=-53.1 (+1 eps) [worker 2] episodes_seen=610 last_return=65.2 (+1 eps) [worker 1] episodes_seen=620 last_return=-103.0 (+1 eps) [worker 0] episodes_seen=630 last_return=-87.1 (+1 eps) [worker 3] episodes_seen=620 last_return=-87.6 (+1 eps) [worker 2] episodes_seen=620 last_return=-105.5 (+1 eps) [worker 1] episodes_seen=630 last_return=-100.8 (+1 eps) [worker 0] episodes_seen=640 last_return=-98.7 (+1 eps) [worker 3] episodes_seen=630 last_return=-122.3 (+1 eps) [worker 2] episodes_seen=630 last_return=-130.1 (+1 eps) [worker 1] episodes_seen=640 last_return=-132.3 (+1 eps) [worker 0] episodes_seen=650 last_return=-142.0 (+1 eps) [worker 3] episodes_seen=640 last_return=-85.7 (+1 eps) [worker 2] episodes_seen=640 last_return=-81.3 (+1 eps) [worker 1] episodes_seen=650 last_return=-144.7 (+1 eps) [worker 0] episodes_seen=660 last_return=-91.6 (+1 eps) [worker 3] episodes_seen=650 last_return=-125.6 (+1 eps) [worker 2] episodes_seen=650 last_return=-84.5 (+1 eps) [worker 0] episodes_seen=670 last_return=-151.1 (+1 eps) [worker 1] episodes_seen=660 last_return=-151.5 (+1 eps) [worker 3] episodes_seen=660 last_return=-95.7 (+1 eps) [worker 2] episodes_seen=660 last_return=-71.6 (+1 eps) [worker 0] episodes_seen=680 last_return=-90.4 (+1 eps) [worker 1] episodes_seen=670 last_return=-80.4 (+1 eps) [worker 3] episodes_seen=670 last_return=-94.0 (+1 eps) [worker 2] episodes_seen=670 last_return=-104.4 (+1 eps) [worker 0] episodes_seen=690 last_return=-149.0 (+1 eps) [worker 1] episodes_seen=680 last_return=-208.5 (+1 eps) [worker 3] episodes_seen=680 last_return=-97.0 (+1 eps) [worker 2] episodes_seen=680 last_return=-98.3 (+1 eps) [worker 1] episodes_seen=690 last_return=-95.5 (+1 eps) [worker 3] episodes_seen=690 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=700 last_return=-116.3 (+1 eps) [worker 2] episodes_seen=690 last_return=-61.0 (+1 eps) [worker 1] episodes_seen=700 last_return=-92.3 (+1 eps) [worker 3] episodes_seen=700 last_return=-115.4 (+1 eps) [worker 0] episodes_seen=710 last_return=-106.3 (+1 eps) [worker 2] episodes_seen=700 last_return=-96.2 (+1 eps) [worker 1] episodes_seen=710 last_return=-99.9 (+1 eps) [worker 3] episodes_seen=710 last_return=-84.0 (+1 eps) [worker 0] episodes_seen=720 last_return=-82.7 (+1 eps) [worker 2] episodes_seen=710 last_return=-295.2 (+1 eps) [worker 3] episodes_seen=720 last_return=-47.8 (+1 eps) [worker 0] episodes_seen=730 last_return=-67.6 (+1 eps) [worker 1] episodes_seen=720 last_return=-131.5 (+1 eps) [worker 2] episodes_seen=720 last_return=-311.4 (+1 eps) [worker 3] episodes_seen=730 last_return=-95.4 (+1 eps) [worker 0] episodes_seen=740 last_return=-175.7 (+1 eps) [worker 1] episodes_seen=730 last_return=-148.3 (+1 eps) [worker 2] episodes_seen=730 last_return=-209.8 (+1 eps) [worker 3] episodes_seen=740 last_return=-225.1 (+1 eps) [worker 0] episodes_seen=750 last_return=-46.4 (+1 eps) [worker 1] episodes_seen=740 last_return=-106.7 (+1 eps) [worker 2] episodes_seen=740 last_return=-256.7 (+1 eps) [worker 3] episodes_seen=750 last_return=-147.9 (+1 eps) [worker 0] episodes_seen=760 last_return=-122.1 (+1 eps) [worker 1] episodes_seen=750 last_return=-240.5 (+1 eps) [worker 2] episodes_seen=750 last_return=-147.5 (+1 eps) [A2C][sync] it= 2337 steps= 280440 (+120) avg10=-106.74 loss=359.063 pg=-0.012 vf=552.431 H=1.155 gn=904.927 [worker 3] episodes_seen=760 last_return=-166.5 (+1 eps) [worker 1] episodes_seen=760 last_return=-126.0 (+1 eps) [worker 0] episodes_seen=770 last_return=-177.5 (+1 eps) [worker 2] episodes_seen=760 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=770 last_return=-111.3 (+1 eps) [worker 1] episodes_seen=770 last_return=-76.8 (+1 eps) [worker 0] episodes_seen=780 last_return=52.2 (+1 eps) [worker 2] episodes_seen=770 last_return=-131.1 (+1 eps) [worker 3] episodes_seen=780 last_return=-268.1 (+1 eps) [worker 1] episodes_seen=780 last_return=-69.6 (+1 eps) [worker 0] episodes_seen=790 last_return=-251.6 (+1 eps) [worker 3] episodes_seen=790 last_return=-250.9 (+1 eps) [worker 2] episodes_seen=780 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=790 last_return=-204.4 (+1 eps) [worker 0] episodes_seen=800 last_return=-233.4 (+1 eps) [worker 2] episodes_seen=790 last_return=-256.3 (+1 eps) [worker 3] episodes_seen=800 last_return=-248.9 (+1 eps) [worker 1] episodes_seen=800 last_return=-371.3 (+1 eps) [worker 0] episodes_seen=810 last_return=-156.5 (+1 eps) [worker 2] episodes_seen=800 last_return=-206.6 (+1 eps) [worker 0] episodes_seen=820 last_return=-145.8 (+1 eps) [worker 1] episodes_seen=810 last_return=-158.9 (+1 eps) [worker 3] episodes_seen=810 last_return=-96.4 (+1 eps) [worker 2] episodes_seen=810 last_return=-72.7 (+1 eps) [worker 0] episodes_seen=830 last_return=46.7 (+1 eps) [worker 1] episodes_seen=820 last_return=-140.8 (+1 eps) [worker 3] episodes_seen=820 last_return=-72.7 (+1 eps) [worker 2] episodes_seen=820 last_return=-138.1 (+1 eps) [worker 0] episodes_seen=840 last_return=-56.3 (+1 eps) [worker 1] episodes_seen=830 last_return=-362.8 (+1 eps) [worker 3] episodes_seen=830 last_return=-163.9 (+1 eps) [worker 2] episodes_seen=830 last_return=-156.4 (+1 eps) [worker 0] episodes_seen=850 last_return=-158.3 (+1 eps) [worker 3] episodes_seen=840 last_return=-215.4 (+1 eps) [worker 1] episodes_seen=840 last_return=-277.5 (+1 eps) [worker 2] episodes_seen=840 last_return=-312.0 (+1 eps) [worker 0] episodes_seen=860 last_return=-172.7 (+1 eps) [worker 3] episodes_seen=850 last_return=-264.0 (+1 eps) [worker 1] episodes_seen=850 last_return=-115.2 (+1 eps) [worker 2] episodes_seen=850 last_return=-78.4 (+1 eps) [worker 0] episodes_seen=870 last_return=-131.8 (+1 eps) [worker 3] episodes_seen=860 last_return=-96.2 (+1 eps) [worker 1] episodes_seen=860 last_return=-204.3 (+1 eps) [worker 2] episodes_seen=860 last_return=-74.0 (+1 eps) [worker 3] episodes_seen=870 last_return=-131.5 (+1 eps) [worker 0] episodes_seen=880 last_return=-223.8 (+1 eps) [worker 1] episodes_seen=870 last_return=-97.1 (+1 eps) [worker 2] episodes_seen=870 last_return=-118.8 (+1 eps) [worker 3] episodes_seen=880 last_return=-138.4 (+1 eps) [worker 0] episodes_seen=890 last_return=-98.0 (+1 eps) [worker 1] episodes_seen=880 last_return=-220.3 (+1 eps) [worker 2] episodes_seen=880 last_return=-91.5 (+1 eps) [worker 3] episodes_seen=890 last_return=-129.0 (+1 eps) [worker 0] episodes_seen=900 last_return=-67.3 (+1 eps) [worker 1] episodes_seen=890 last_return=-118.3 (+1 eps) [worker 2] episodes_seen=890 last_return=-83.6 (+1 eps) [worker 3] episodes_seen=900 last_return=-206.2 (+1 eps) [worker 0] episodes_seen=910 last_return=-189.7 (+1 eps) [worker 1] episodes_seen=900 last_return=-205.6 (+1 eps) [worker 2] episodes_seen=900 last_return=-100.2 (+1 eps) [worker 3] episodes_seen=910 last_return=-135.6 (+1 eps) [worker 0] episodes_seen=920 last_return=-250.0 (+1 eps) [worker 1] episodes_seen=910 last_return=-154.8 (+1 eps) [worker 2] episodes_seen=910 last_return=-89.3 (+1 eps) [worker 3] episodes_seen=920 last_return=-160.2 (+1 eps) [worker 0] episodes_seen=930 last_return=-127.1 (+1 eps) [worker 1] episodes_seen=920 last_return=-183.0 (+1 eps) [worker 2] episodes_seen=920 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=930 last_return=-261.7 (+1 eps) [worker 1] episodes_seen=930 last_return=-52.9 (+1 eps) [worker 0] episodes_seen=940 last_return=-89.7 (+1 eps) [worker 2] episodes_seen=930 last_return=-74.2 (+1 eps) [worker 3] episodes_seen=940 last_return=-115.4 (+1 eps) [worker 1] episodes_seen=940 last_return=-84.8 (+1 eps) [worker 0] episodes_seen=950 last_return=-108.6 (+1 eps) [worker 2] episodes_seen=940 last_return=-219.5 (+1 eps) [worker 3] episodes_seen=950 last_return=-179.5 (+1 eps) [worker 1] episodes_seen=950 last_return=-87.5 (+1 eps) [worker 0] episodes_seen=960 last_return=-149.8 (+1 eps) [worker 2] episodes_seen=950 last_return=-176.8 (+1 eps) [worker 3] episodes_seen=960 last_return=-98.4 (+1 eps) [A2C][sync] it= 2921 steps= 350520 (+120) avg10=-106.73 loss=431.459 pg=-0.069 vf=663.900 H=1.346 gn=1811.322 [worker 1] episodes_seen=960 last_return=-77.2 (+1 eps) [worker 0] episodes_seen=970 last_return=-53.9 (+1 eps) [worker 2] episodes_seen=960 last_return=-77.9 (+1 eps) [worker 3] episodes_seen=970 last_return=-57.8 (+1 eps) [worker 1] episodes_seen=970 last_return=-83.9 (+1 eps) [worker 0] episodes_seen=980 last_return=-84.9 (+1 eps) [worker 2] episodes_seen=970 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=980 last_return=-71.1 (+1 eps) [worker 1] episodes_seen=980 last_return=-97.0 (+1 eps) [worker 0] episodes_seen=990 last_return=-84.8 (+1 eps) [worker 2] episodes_seen=980 last_return=-104.4 (+1 eps) [worker 3] episodes_seen=990 last_return=-86.4 (+1 eps) [worker 1] episodes_seen=990 last_return=-77.0 (+1 eps) [worker 0] episodes_seen=1000 last_return=-103.9 (+1 eps) [worker 2] episodes_seen=990 last_return=-80.9 (+1 eps) [worker 3] episodes_seen=1000 last_return=-83.9 (+1 eps) [worker 1] episodes_seen=1000 last_return=-111.4 (+1 eps) [worker 0] episodes_seen=1010 last_return=-78.9 (+1 eps) [worker 2] episodes_seen=1000 last_return=-92.9 (+1 eps) [worker 3] episodes_seen=1010 last_return=-68.1 (+1 eps) [worker 1] episodes_seen=1010 last_return=-81.5 (+1 eps) [worker 0] episodes_seen=1020 last_return=-78.0 (+1 eps) [worker 2] episodes_seen=1010 last_return=-130.5 (+1 eps) [worker 3] episodes_seen=1020 last_return=-180.8 (+1 eps) [worker 1] episodes_seen=1020 last_return=-68.2 (+1 eps) [worker 0] episodes_seen=1030 last_return=-108.9 (+1 eps) [worker 2] episodes_seen=1020 last_return=-250.2 (+1 eps) [worker 3] episodes_seen=1030 last_return=-185.3 (+1 eps) [worker 1] episodes_seen=1030 last_return=-58.6 (+1 eps) [worker 0] episodes_seen=1040 last_return=-102.9 (+1 eps) [worker 2] episodes_seen=1030 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=1040 last_return=-99.3 (+1 eps) [worker 1] episodes_seen=1040 last_return=-99.8 (+1 eps) [worker 0] episodes_seen=1050 last_return=-88.1 (+1 eps) [worker 2] episodes_seen=1040 last_return=-87.7 (+1 eps) [worker 3] episodes_seen=1050 last_return=-136.9 (+1 eps) [worker 1] episodes_seen=1050 last_return=-71.7 (+1 eps) [worker 0] episodes_seen=1060 last_return=-84.7 (+1 eps) [worker 2] episodes_seen=1050 last_return=-94.2 (+1 eps) [worker 1] episodes_seen=1060 last_return=-39.5 (+1 eps) [worker 3] episodes_seen=1060 last_return=122.7 (+1 eps) [worker 0] episodes_seen=1070 last_return=-164.0 (+1 eps) [worker 2] episodes_seen=1060 last_return=-93.2 (+1 eps) [worker 1] episodes_seen=1070 last_return=-91.1 (+1 eps) [worker 3] episodes_seen=1070 last_return=-56.5 (+1 eps) [worker 0] episodes_seen=1080 last_return=-92.8 (+1 eps) [worker 2] episodes_seen=1070 last_return=-132.2 (+1 eps) [worker 3] episodes_seen=1080 last_return=-134.5 (+1 eps) [worker 0] episodes_seen=1090 last_return=-96.5 (+1 eps) [worker 1] episodes_seen=1080 last_return=9.6 (+1 eps) [worker 2] episodes_seen=1080 last_return=-31.1 (+1 eps) [worker 3] episodes_seen=1090 last_return=-96.4 (+1 eps) [worker 0] episodes_seen=1100 last_return=-103.5 (+1 eps) [worker 1] episodes_seen=1090 last_return=-98.1 (+1 eps) [worker 2] episodes_seen=1090 last_return=-80.5 (+1 eps) [worker 3] episodes_seen=1100 last_return=-107.9 (+1 eps) [worker 0] episodes_seen=1110 last_return=-80.1 (+1 eps) [worker 1] episodes_seen=1100 last_return=-84.1 (+1 eps) [worker 2] episodes_seen=1100 last_return=-120.6 (+1 eps) [worker 3] episodes_seen=1110 last_return=-107.6 (+1 eps) [worker 0] episodes_seen=1120 last_return=-136.4 (+1 eps) [worker 1] episodes_seen=1110 last_return=-74.6 (+1 eps) [worker 2] episodes_seen=1110 last_return=-124.3 (+1 eps) [worker 3] episodes_seen=1120 last_return=-91.9 (+1 eps) [worker 1] episodes_seen=1120 last_return=-131.4 (+1 eps) [worker 3] episodes_seen=1130 last_return=-151.7 (+1 eps) [worker 2] episodes_seen=1120 last_return=-112.0 (+1 eps) [worker 0] episodes_seen=1130 last_return=-81.8 (+1 eps) [A2C][sync] it= 3505 steps= 420600 (+120) avg10=-120.28 loss=451.651 pg=0.027 vf=694.814 H=1.064 gn=1519.010 [worker 1] episodes_seen=1130 last_return=-241.9 (+1 eps) [worker 2] episodes_seen=1130 last_return=-84.8 (+1 eps) [worker 3] episodes_seen=1140 last_return=-117.1 (+1 eps) [worker 0] episodes_seen=1140 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1140 last_return=-98.8 (+1 eps) [worker 2] episodes_seen=1140 last_return=-89.8 (+1 eps) [worker 3] episodes_seen=1150 last_return=-70.4 (+1 eps) [worker 0] episodes_seen=1150 last_return=-170.6 (+1 eps) [worker 1] episodes_seen=1150 last_return=-92.6 (+1 eps) [worker 2] episodes_seen=1150 last_return=-143.4 (+1 eps) [worker 3] episodes_seen=1160 last_return=-152.2 (+1 eps) [worker 0] episodes_seen=1160 last_return=-181.5 (+1 eps) [worker 1] episodes_seen=1160 last_return=-132.0 (+1 eps) [worker 3] episodes_seen=1170 last_return=-175.9 (+1 eps) [worker 2] episodes_seen=1160 last_return=-100.2 (+1 eps) [worker 0] episodes_seen=1170 last_return=-245.7 (+1 eps) [worker 1] episodes_seen=1170 last_return=-205.9 (+1 eps) [worker 3] episodes_seen=1180 last_return=-201.0 (+1 eps) [worker 2] episodes_seen=1170 last_return=-157.5 (+1 eps) [worker 0] episodes_seen=1180 last_return=-207.1 (+1 eps) [worker 1] episodes_seen=1180 last_return=-223.1 (+1 eps) [worker 3] episodes_seen=1190 last_return=-162.0 (+1 eps) [worker 2] episodes_seen=1180 last_return=-110.2 (+1 eps) [worker 0] episodes_seen=1190 last_return=-279.5 (+1 eps) [worker 1] episodes_seen=1190 last_return=-158.2 (+1 eps) [worker 3] episodes_seen=1200 last_return=204.1 (+1 eps) [worker 2] episodes_seen=1190 last_return=-27.2 (+1 eps) [worker 0] episodes_seen=1200 last_return=-122.2 (+1 eps) [worker 1] episodes_seen=1200 last_return=-315.2 (+1 eps) [worker 3] episodes_seen=1210 last_return=-150.4 (+1 eps) [worker 0] episodes_seen=1210 last_return=-330.7 (+1 eps) [worker 2] episodes_seen=1200 last_return=-238.0 (+1 eps) [worker 1] episodes_seen=1210 last_return=-409.1 (+1 eps) [worker 3] episodes_seen=1220 last_return=-197.2 (+1 eps) [worker 2] episodes_seen=1210 last_return=-277.8 (+1 eps) [worker 0] episodes_seen=1220 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1220 last_return=-2.6 (+1 eps) [worker 3] episodes_seen=1230 last_return=-380.4 (+1 eps) [worker 2] episodes_seen=1220 last_return=-237.0 (+1 eps) [worker 0] episodes_seen=1230 last_return=-369.5 (+1 eps) [worker 1] episodes_seen=1230 last_return=-161.7 (+1 eps) [worker 3] episodes_seen=1240 last_return=-293.0 (+1 eps) [worker 2] episodes_seen=1230 last_return=-162.8 (+1 eps) [worker 0] episodes_seen=1240 last_return=-136.0 (+1 eps) [worker 1] episodes_seen=1240 last_return=-99.5 (+1 eps) [worker 3] episodes_seen=1250 last_return=-140.6 (+1 eps) [worker 2] episodes_seen=1240 last_return=-226.7 (+1 eps) [worker 0] episodes_seen=1250 last_return=-165.4 (+1 eps) [worker 1] episodes_seen=1250 last_return=-157.8 (+1 eps) [worker 3] episodes_seen=1260 last_return=-195.2 (+1 eps) [worker 2] episodes_seen=1250 last_return=-112.8 (+1 eps) [worker 0] episodes_seen=1260 last_return=-102.3 (+1 eps) [worker 1] episodes_seen=1260 last_return=-43.1 (+1 eps) [worker 3] episodes_seen=1270 last_return=-32.6 (+1 eps) [worker 2] episodes_seen=1260 last_return=-106.0 (+1 eps) [worker 0] episodes_seen=1270 last_return=-95.0 (+1 eps) [worker 1] episodes_seen=1270 last_return=-187.7 (+1 eps) [worker 2] episodes_seen=1270 last_return=-84.5 (+1 eps) [worker 3] episodes_seen=1280 last_return=-222.6 (+1 eps) [worker 0] episodes_seen=1280 last_return=-152.6 (+1 eps) [worker 1] episodes_seen=1280 last_return=-163.6 (+1 eps) [worker 2] episodes_seen=1280 last_return=-174.1 (+1 eps) [worker 3] episodes_seen=1290 last_return=-239.2 (+1 eps) [worker 0] episodes_seen=1290 last_return=-157.8 (+1 eps) [worker 1] episodes_seen=1290 last_return=-105.9 (+1 eps) [worker 2] episodes_seen=1290 last_return=-124.8 (+1 eps) [worker 3] episodes_seen=1300 last_return=-93.6 (+1 eps) [worker 0] episodes_seen=1300 last_return=-67.8 (+1 eps) [worker 1] episodes_seen=1300 last_return=-105.5 (+1 eps) [worker 2] episodes_seen=1300 last_return=-80.7 (+1 eps) [worker 3] episodes_seen=1310 last_return=-142.9 (+1 eps) [worker 0] episodes_seen=1310 last_return=-93.9 (+1 eps) [worker 1] episodes_seen=1310 last_return=-285.2 (+1 eps) [A2C][sync] it= 4089 steps= 490680 (+120) avg10=-173.78 loss=717.454 pg=0.049 vf=1103.709 H=1.139 gn=1805.956 [worker 3] episodes_seen=1320 last_return=-114.3 (+1 eps) [worker 2] episodes_seen=1310 last_return=-71.4 (+1 eps) [worker 0] episodes_seen=1320 last_return=-97.4 (+1 eps) [worker 1] episodes_seen=1320 last_return=-146.1 (+1 eps) [worker 2] episodes_seen=1320 last_return=-72.5 (+1 eps) [worker 3] episodes_seen=1330 last_return=-82.1 (+1 eps) [worker 0] episodes_seen=1330 last_return=-112.1 (+1 eps) [worker 1] episodes_seen=1330 last_return=-200.2 (+1 eps) [worker 2] episodes_seen=1330 last_return=4.3 (+1 eps) [worker 3] episodes_seen=1340 last_return=-146.8 (+1 eps) [worker 0] episodes_seen=1340 last_return=-83.9 (+1 eps) [worker 1] episodes_seen=1340 last_return=-96.8 (+1 eps) [worker 3] episodes_seen=1350 last_return=-274.8 (+1 eps) [worker 2] episodes_seen=1340 last_return=-94.6 (+1 eps) [worker 0] episodes_seen=1350 last_return=-108.7 (+1 eps) [worker 1] episodes_seen=1350 last_return=-130.6 (+1 eps) [worker 3] episodes_seen=1360 last_return=-221.0 (+1 eps) [worker 2] episodes_seen=1350 last_return=-99.9 (+1 eps) [worker 0] episodes_seen=1360 last_return=-71.8 (+1 eps) [worker 1] episodes_seen=1360 last_return=-128.8 (+1 eps) [worker 2] episodes_seen=1360 last_return=27.8 (+1 eps) [worker 3] episodes_seen=1370 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1370 last_return=-66.2 (+1 eps) [worker 1] episodes_seen=1370 last_return=-90.5 (+1 eps) [worker 3] episodes_seen=1380 last_return=-97.2 (+1 eps) [worker 2] episodes_seen=1370 last_return=-114.0 (+1 eps) [worker 0] episodes_seen=1380 last_return=-88.6 (+1 eps) [worker 1] episodes_seen=1380 last_return=-165.4 (+1 eps) [worker 2] episodes_seen=1380 last_return=-108.4 (+1 eps) [worker 3] episodes_seen=1390 last_return=-88.3 (+1 eps) [worker 0] episodes_seen=1390 last_return=-74.8 (+1 eps) [worker 1] episodes_seen=1390 last_return=-11.1 (+1 eps) [worker 2] episodes_seen=1390 last_return=-81.1 (+1 eps) [worker 3] episodes_seen=1400 last_return=-86.5 (+1 eps) [worker 0] episodes_seen=1400 last_return=-130.7 (+1 eps) [worker 1] episodes_seen=1400 last_return=-148.7 (+1 eps) [worker 3] episodes_seen=1410 last_return=-81.9 (+1 eps) [worker 0] episodes_seen=1410 last_return=-123.5 (+1 eps) [worker 1] episodes_seen=1410 last_return=-102.4 (+1 eps) [worker 2] episodes_seen=1400 last_return=-0.7 (+1 eps) [worker 3] episodes_seen=1420 last_return=-74.8 (+1 eps) [worker 0] episodes_seen=1420 last_return=-95.3 (+1 eps) [worker 2] episodes_seen=1410 last_return=-68.3 (+1 eps) [worker 1] episodes_seen=1420 last_return=-134.1 (+1 eps) [worker 3] episodes_seen=1430 last_return=-135.2 (+1 eps) [worker 0] episodes_seen=1430 last_return=-118.2 (+1 eps) [worker 2] episodes_seen=1420 last_return=-116.9 (+1 eps) [worker 1] episodes_seen=1430 last_return=-87.6 (+1 eps) [worker 3] episodes_seen=1440 last_return=-84.6 (+1 eps) [worker 0] episodes_seen=1440 last_return=-113.1 (+1 eps) [worker 2] episodes_seen=1430 last_return=-64.6 (+1 eps) [worker 1] episodes_seen=1440 last_return=-131.6 (+1 eps) [worker 3] episodes_seen=1450 last_return=-100.4 (+1 eps) [worker 0] episodes_seen=1450 last_return=-92.7 (+1 eps) [worker 2] episodes_seen=1440 last_return=-214.8 (+1 eps) [worker 1] episodes_seen=1450 last_return=-175.8 (+1 eps) [worker 3] episodes_seen=1460 last_return=-152.4 (+1 eps) [worker 0] episodes_seen=1460 last_return=-152.2 (+1 eps) [worker 2] episodes_seen=1450 last_return=-248.4 (+1 eps) [worker 1] episodes_seen=1460 last_return=-223.3 (+1 eps) [worker 0] episodes_seen=1470 last_return=-285.0 (+1 eps) [worker 3] episodes_seen=1470 last_return=-195.6 (+1 eps) [worker 2] episodes_seen=1460 last_return=-242.4 (+1 eps) [worker 1] episodes_seen=1470 last_return=-242.0 (+1 eps) [worker 0] episodes_seen=1480 last_return=-233.0 (+1 eps) [worker 3] episodes_seen=1480 last_return=-411.8 (+1 eps) [worker 2] episodes_seen=1470 last_return=-152.3 (+1 eps) [worker 1] episodes_seen=1480 last_return=-183.2 (+1 eps) [worker 3] episodes_seen=1490 last_return=-252.7 (+1 eps) [worker 0] episodes_seen=1490 last_return=-274.9 (+1 eps) [worker 2] episodes_seen=1480 last_return=-160.6 (+1 eps) [worker 1] episodes_seen=1490 last_return=-3.9 (+1 eps) [worker 3] episodes_seen=1500 last_return=-171.2 (+1 eps) [worker 0] episodes_seen=1500 last_return=-329.1 (+1 eps) [worker 2] episodes_seen=1490 last_return=-304.9 (+1 eps) [A2C][sync] it= 4673 steps= 560760 (+120) avg10=-212.90 loss=3014.486 pg=0.033 vf=4637.628 H=0.983 gn=2504.687 [worker 1] episodes_seen=1500 last_return=-209.9 (+1 eps) [worker 3] episodes_seen=1510 last_return=-221.1 (+1 eps) [worker 0] episodes_seen=1510 last_return=-335.0 (+1 eps) [worker 2] episodes_seen=1500 last_return=-189.8 (+1 eps) [worker 1] episodes_seen=1510 last_return=-80.0 (+1 eps) [worker 3] episodes_seen=1520 last_return=-68.1 (+1 eps) [worker 0] episodes_seen=1520 last_return=-101.0 (+1 eps) [worker 2] episodes_seen=1510 last_return=-205.0 (+1 eps) [worker 3] episodes_seen=1530 last_return=-59.6 (+1 eps) [worker 0] episodes_seen=1530 last_return=-167.3 (+1 eps) [worker 1] episodes_seen=1520 last_return=-126.5 (+1 eps) [worker 2] episodes_seen=1520 last_return=-84.3 (+1 eps) [worker 3] episodes_seen=1540 last_return=-101.1 (+1 eps) [worker 0] episodes_seen=1540 last_return=-118.7 (+1 eps) [worker 1] episodes_seen=1530 last_return=-97.8 (+1 eps) [worker 2] episodes_seen=1530 last_return=-72.6 (+1 eps) [worker 0] episodes_seen=1550 last_return=-142.2 (+1 eps) [worker 3] episodes_seen=1550 last_return=-199.7 (+1 eps) [worker 1] episodes_seen=1540 last_return=-65.2 (+1 eps) [worker 2] episodes_seen=1540 last_return=-104.4 (+1 eps) [worker 0] episodes_seen=1560 last_return=-123.7 (+1 eps) [worker 3] episodes_seen=1560 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1550 last_return=-143.2 (+1 eps) [worker 2] episodes_seen=1550 last_return=-142.8 (+1 eps) [worker 0] episodes_seen=1570 last_return=-134.9 (+1 eps) [worker 3] episodes_seen=1570 last_return=-107.9 (+1 eps) [worker 1] episodes_seen=1560 last_return=-65.3 (+1 eps) [worker 2] episodes_seen=1560 last_return=-164.0 (+1 eps) [worker 0] episodes_seen=1580 last_return=-61.5 (+1 eps) [worker 3] episodes_seen=1580 last_return=-73.2 (+1 eps) [worker 1] episodes_seen=1570 last_return=-91.6 (+1 eps) [worker 2] episodes_seen=1570 last_return=-105.2 (+1 eps) [worker 0] episodes_seen=1590 last_return=-102.4 (+1 eps) [worker 3] episodes_seen=1590 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1580 last_return=-53.3 (+1 eps) [worker 2] episodes_seen=1580 last_return=-88.7 (+1 eps) [worker 0] episodes_seen=1600 last_return=-50.8 (+1 eps) [worker 3] episodes_seen=1600 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1590 last_return=-148.4 (+1 eps) [worker 2] episodes_seen=1590 last_return=-102.7 (+1 eps) [worker 0] episodes_seen=1610 last_return=-82.4 (+1 eps) [worker 1] episodes_seen=1600 last_return=-102.2 (+1 eps) [worker 2] episodes_seen=1600 last_return=-138.7 (+1 eps) [worker 3] episodes_seen=1610 last_return=-105.9 (+1 eps) [worker 1] episodes_seen=1610 last_return=-78.5 (+1 eps) [worker 0] episodes_seen=1620 last_return=-61.8 (+1 eps) [worker 2] episodes_seen=1610 last_return=-136.3 (+1 eps) [worker 3] episodes_seen=1620 last_return=-96.7 (+1 eps) [worker 1] episodes_seen=1620 last_return=-123.1 (+1 eps) [worker 0] episodes_seen=1630 last_return=-73.5 (+1 eps) [worker 2] episodes_seen=1620 last_return=-117.2 (+1 eps) [worker 3] episodes_seen=1630 last_return=-92.5 (+1 eps) [worker 1] episodes_seen=1630 last_return=-115.7 (+1 eps) [worker 0] episodes_seen=1640 last_return=-130.3 (+1 eps) [worker 3] episodes_seen=1640 last_return=-300.3 (+1 eps) [worker 2] episodes_seen=1630 last_return=-126.5 (+1 eps) [worker 1] episodes_seen=1640 last_return=-66.1 (+1 eps) [worker 0] episodes_seen=1650 last_return=-63.5 (+1 eps) [worker 3] episodes_seen=1650 last_return=-74.8 (+1 eps) [worker 2] episodes_seen=1640 last_return=-95.9 (+1 eps) [worker 1] episodes_seen=1650 last_return=-148.8 (+1 eps) [worker 0] episodes_seen=1660 last_return=-235.3 (+1 eps) [worker 3] episodes_seen=1660 last_return=-107.3 (+1 eps) [worker 2] episodes_seen=1650 last_return=-115.2 (+1 eps) [worker 1] episodes_seen=1660 last_return=-323.8 (+1 eps) [worker 0] episodes_seen=1670 last_return=-205.5 (+1 eps) [worker 2] episodes_seen=1660 last_return=-96.9 (+1 eps) [worker 3] episodes_seen=1670 last_return=-59.7 (+1 eps) [A2C][sync] it= 5257 steps= 630840 (+120) avg10=-111.08 loss=955.641 pg=-0.070 vf=1470.333 H=1.110 gn=1147.500 [worker 1] episodes_seen=1670 last_return=-92.7 (+1 eps) [worker 0] episodes_seen=1680 last_return=-96.5 (+1 eps) [worker 2] episodes_seen=1670 last_return=-73.3 (+1 eps) [worker 3] episodes_seen=1680 last_return=-75.7 (+1 eps) [worker 1] episodes_seen=1680 last_return=-110.1 (+1 eps) [worker 0] episodes_seen=1690 last_return=-100.4 (+1 eps) [worker 2] episodes_seen=1680 last_return=-79.1 (+1 eps) [worker 3] episodes_seen=1690 last_return=-105.2 (+1 eps) [worker 1] episodes_seen=1690 last_return=-80.2 (+1 eps) [worker 0] episodes_seen=1700 last_return=-100.2 (+1 eps) [worker 2] episodes_seen=1690 last_return=-47.1 (+1 eps) [worker 3] episodes_seen=1700 last_return=-118.0 (+1 eps) [worker 1] episodes_seen=1700 last_return=-118.0 (+1 eps) [worker 0] episodes_seen=1710 last_return=-129.6 (+1 eps) [worker 2] episodes_seen=1700 last_return=38.8 (+1 eps) [worker 3] episodes_seen=1710 last_return=-63.0 (+1 eps) [worker 1] episodes_seen=1710 last_return=-329.2 (+1 eps) [worker 0] episodes_seen=1720 last_return=-174.2 (+1 eps) [worker 2] episodes_seen=1710 last_return=-332.8 (+1 eps) [worker 3] episodes_seen=1720 last_return=-250.4 (+1 eps) [worker 0] episodes_seen=1730 last_return=-293.9 (+1 eps) [worker 1] episodes_seen=1720 last_return=-208.9 (+1 eps) [worker 2] episodes_seen=1720 last_return=-212.4 (+1 eps) [worker 3] episodes_seen=1730 last_return=-181.4 (+1 eps) [worker 0] episodes_seen=1740 last_return=-129.2 (+1 eps) [worker 2] episodes_seen=1730 last_return=-272.7 (+1 eps) [worker 1] episodes_seen=1730 last_return=-224.8 (+1 eps) [worker 3] episodes_seen=1740 last_return=-197.3 (+1 eps) [worker 0] episodes_seen=1750 last_return=-156.6 (+1 eps) [worker 2] episodes_seen=1740 last_return=-199.7 (+1 eps) [worker 1] episodes_seen=1740 last_return=-53.3 (+1 eps) [worker 3] episodes_seen=1750 last_return=-274.7 (+1 eps) [worker 0] episodes_seen=1760 last_return=-226.9 (+1 eps) [worker 1] episodes_seen=1750 last_return=-238.9 (+1 eps) [worker 2] episodes_seen=1750 last_return=-309.2 (+1 eps) [worker 3] episodes_seen=1760 last_return=-207.6 (+1 eps) [worker 1] episodes_seen=1760 last_return=-186.5 (+1 eps) [worker 0] episodes_seen=1770 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1760 last_return=-114.0 (+1 eps) [worker 3] episodes_seen=1770 last_return=-195.1 (+1 eps) [worker 0] episodes_seen=1780 last_return=-129.3 (+1 eps) [worker 1] episodes_seen=1770 last_return=-290.5 (+1 eps) [worker 3] episodes_seen=1780 last_return=-140.1 (+1 eps) [worker 2] episodes_seen=1770 last_return=-301.9 (+1 eps) [worker 0] episodes_seen=1790 last_return=-233.2 (+1 eps) [worker 1] episodes_seen=1780 last_return=-193.6 (+1 eps) [worker 2] episodes_seen=1780 last_return=-129.1 (+1 eps) [worker 3] episodes_seen=1790 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1800 last_return=-162.9 (+1 eps) [worker 1] episodes_seen=1790 last_return=-65.1 (+1 eps) [worker 3] episodes_seen=1800 last_return=-51.6 (+1 eps) [worker 2] episodes_seen=1790 last_return=-108.6 (+1 eps) [worker 1] episodes_seen=1800 last_return=-87.5 (+1 eps) [worker 3] episodes_seen=1810 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1810 last_return=32.9 (+1 eps) [worker 2] episodes_seen=1800 last_return=-66.2 (+1 eps) [worker 3] episodes_seen=1820 last_return=-75.1 (+1 eps) [worker 0] episodes_seen=1820 last_return=-93.3 (+1 eps) [worker 1] episodes_seen=1810 last_return=-74.7 (+1 eps) [worker 2] episodes_seen=1810 last_return=-55.5 (+1 eps) [worker 0] episodes_seen=1830 last_return=-87.4 (+1 eps) [worker 3] episodes_seen=1830 last_return=-105.6 (+1 eps) [worker 2] episodes_seen=1820 last_return=-122.8 (+1 eps) [worker 1] episodes_seen=1820 last_return=-122.9 (+1 eps) [worker 0] episodes_seen=1840 last_return=-64.0 (+1 eps) [worker 2] episodes_seen=1830 last_return=-79.9 (+1 eps) [worker 3] episodes_seen=1840 last_return=-76.1 (+1 eps) [worker 1] episodes_seen=1830 last_return=-95.5 (+1 eps) [worker 2] episodes_seen=1840 last_return=-105.9 (+1 eps) [worker 3] episodes_seen=1850 last_return=-106.0 (+1 eps) [worker 0] episodes_seen=1850 last_return=-99.9 (+1 eps) [worker 1] episodes_seen=1840 last_return=-170.9 (+1 eps) [worker 2] episodes_seen=1850 last_return=-94.3 (+1 eps) [A2C][sync] it= 5841 steps= 700920 (+120) avg10= -89.06 loss=326.130 pg=0.016 vf=501.723 H=1.100 gn=449.474 [worker 3] episodes_seen=1860 last_return=-83.7 (+1 eps) [worker 0] episodes_seen=1860 last_return=-132.2 (+1 eps) [worker 1] episodes_seen=1850 last_return=-86.0 (+1 eps) [worker 2] episodes_seen=1860 last_return=-74.7 (+1 eps) [worker 3] episodes_seen=1870 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1870 last_return=-107.6 (+1 eps) [worker 1] episodes_seen=1860 last_return=-59.0 (+1 eps) [worker 0] episodes_seen=1880 last_return=-87.4 (+1 eps) [worker 3] episodes_seen=1880 last_return=-100.3 (+1 eps) [worker 2] episodes_seen=1870 last_return=-75.1 (+1 eps) [worker 1] episodes_seen=1870 last_return=-98.9 (+1 eps) [worker 0] episodes_seen=1890 last_return=-77.4 (+1 eps) [worker 2] episodes_seen=1880 last_return=-65.7 (+1 eps) [worker 1] episodes_seen=1880 last_return=-169.9 (+1 eps) [worker 3] episodes_seen=1890 last_return=70.7 (+1 eps) [worker 0] episodes_seen=1900 last_return=-155.3 (+1 eps) [worker 2] episodes_seen=1890 last_return=-129.7 (+1 eps) [worker 3] episodes_seen=1900 last_return=10.5 (+1 eps) [worker 1] episodes_seen=1890 last_return=-88.3 (+1 eps) [worker 0] episodes_seen=1910 last_return=-157.7 (+1 eps) [worker 3] episodes_seen=1910 last_return=-28.0 (+1 eps) [worker 2] episodes_seen=1900 last_return=-118.0 (+1 eps) [worker 1] episodes_seen=1900 last_return=-94.9 (+1 eps) [worker 0] episodes_seen=1920 last_return=-93.0 (+1 eps) [worker 2] episodes_seen=1910 last_return=-93.3 (+1 eps) [worker 3] episodes_seen=1920 last_return=-71.6 (+1 eps) [worker 1] episodes_seen=1910 last_return=-153.5 (+1 eps) [worker 0] episodes_seen=1930 last_return=-147.7 (+1 eps) [worker 3] episodes_seen=1930 last_return=-79.9 (+1 eps) [worker 2] episodes_seen=1920 last_return=-77.2 (+1 eps) [worker 1] episodes_seen=1920 last_return=-82.9 (+1 eps) [worker 0] episodes_seen=1940 last_return=-69.5 (+1 eps) [worker 3] episodes_seen=1940 last_return=-64.0 (+1 eps) [worker 1] episodes_seen=1930 last_return=-57.6 (+1 eps) [worker 0] episodes_seen=1950 last_return=-133.6 (+1 eps) [worker 3] episodes_seen=1950 last_return=-111.7 (+1 eps) [worker 2] episodes_seen=1930 last_return=0.1 (+1 eps) [worker 1] episodes_seen=1940 last_return=-102.6 (+1 eps) [worker 0] episodes_seen=1960 last_return=-74.0 (+1 eps) [worker 2] episodes_seen=1940 last_return=-103.7 (+1 eps) [worker 0] episodes_seen=1970 last_return=-104.9 (+1 eps) [worker 1] episodes_seen=1950 last_return=-96.8 (+1 eps) [worker 3] episodes_seen=1960 last_return=-162.3 (+1 eps) [worker 2] episodes_seen=1950 last_return=-84.9 (+1 eps) [worker 0] episodes_seen=1980 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1960 last_return=-102.5 (+1 eps) [worker 3] episodes_seen=1970 last_return=-143.4 (+1 eps) [worker 2] episodes_seen=1960 last_return=-73.5 (+1 eps) [worker 0] episodes_seen=1990 last_return=-94.3 (+1 eps) [worker 1] episodes_seen=1970 last_return=-108.6 (+1 eps) [worker 3] episodes_seen=1980 last_return=-81.5 (+1 eps) [worker 2] episodes_seen=1970 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=2000 last_return=-94.3 (+1 eps) [worker 1] episodes_seen=1980 last_return=-73.8 (+1 eps) [worker 3] episodes_seen=1990 last_return=-178.0 (+1 eps) [worker 2] episodes_seen=1980 last_return=-106.7 (+1 eps) [A2C][sync] it= 6425 steps= 771000 (+120) avg10= -68.33 loss=820.454 pg=-0.017 vf=1262.272 H=1.073 gn=1390.967 [worker 0] episodes_seen=2010 last_return=-74.0 (+1 eps) [worker 1] episodes_seen=1990 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=2000 last_return=-29.7 (+1 eps) [worker 2] episodes_seen=1990 last_return=-93.2 (+1 eps) [worker 0] episodes_seen=2020 last_return=-183.9 (+1 eps) [worker 3] episodes_seen=2010 last_return=-69.5 (+1 eps) [worker 1] episodes_seen=2000 last_return=-45.5 (+1 eps) [worker 2] episodes_seen=2000 last_return=-99.6 (+1 eps) [worker 3] episodes_seen=2020 last_return=-74.5 (+1 eps) [worker 1] episodes_seen=2010 last_return=-90.2 (+1 eps) [worker 2] episodes_seen=2010 last_return=-90.8 (+1 eps) [worker 0] episodes_seen=2030 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=2030 last_return=-154.0 (+1 eps) [worker 1] episodes_seen=2020 last_return=-158.9 (+1 eps) [worker 2] episodes_seen=2020 last_return=-141.2 (+1 eps) [worker 0] episodes_seen=2040 last_return=-222.7 (+1 eps) [worker 3] episodes_seen=2040 last_return=-108.3 (+1 eps) [worker 1] episodes_seen=2030 last_return=-114.4 (+1 eps) [worker 2] episodes_seen=2030 last_return=-85.1 (+1 eps) [worker 0] episodes_seen=2050 last_return=-91.1 (+1 eps) [worker 3] episodes_seen=2050 last_return=-91.0 (+1 eps) [worker 0] episodes_seen=2060 last_return=-119.6 (+1 eps) [worker 2] episodes_seen=2040 last_return=-91.4 (+1 eps) [worker 3] episodes_seen=2060 last_return=-95.9 (+1 eps) [worker 1] episodes_seen=2040 last_return=-91.9 (+1 eps) [worker 2] episodes_seen=2050 last_return=-0.2 (+1 eps) [worker 0] episodes_seen=2070 last_return=-62.8 (+1 eps) [worker 1] episodes_seen=2050 last_return=-78.4 (+1 eps) [worker 3] episodes_seen=2070 last_return=-98.5 (+1 eps) [worker 1] episodes_seen=2060 last_return=-66.4 (+1 eps) [worker 0] episodes_seen=2080 last_return=-110.8 (+1 eps) [worker 3] episodes_seen=2080 last_return=-83.0 (+1 eps) [worker 2] episodes_seen=2060 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2070 last_return=-110.3 (+1 eps) [worker 0] episodes_seen=2090 last_return=-52.4 (+1 eps) [worker 3] episodes_seen=2090 last_return=-129.4 (+1 eps) [worker 2] episodes_seen=2070 last_return=-306.6 (+1 eps) [worker 0] episodes_seen=2100 last_return=-308.7 (+1 eps) [worker 1] episodes_seen=2080 last_return=-124.6 (+1 eps) [worker 3] episodes_seen=2100 last_return=-170.2 (+1 eps) [worker 2] episodes_seen=2080 last_return=-224.1 (+1 eps) [worker 0] episodes_seen=2110 last_return=-234.7 (+1 eps) [worker 1] episodes_seen=2090 last_return=-168.1 (+1 eps) [worker 3] episodes_seen=2110 last_return=-169.0 (+1 eps) [worker 2] episodes_seen=2090 last_return=-206.0 (+1 eps) [worker 0] episodes_seen=2120 last_return=-271.3 (+1 eps) [worker 1] episodes_seen=2100 last_return=-148.0 (+1 eps) [worker 3] episodes_seen=2120 last_return=-186.3 (+1 eps) [worker 2] episodes_seen=2100 last_return=-152.2 (+1 eps) [worker 0] episodes_seen=2130 last_return=-89.8 (+1 eps) [worker 1] episodes_seen=2110 last_return=-38.8 (+1 eps) [worker 3] episodes_seen=2130 last_return=-130.9 (+1 eps) [worker 0] episodes_seen=2140 last_return=-65.8 (+1 eps) [worker 2] episodes_seen=2110 last_return=-35.1 (+1 eps) [worker 1] episodes_seen=2120 last_return=-123.6 (+1 eps) [worker 3] episodes_seen=2140 last_return=-76.5 (+1 eps) [worker 0] episodes_seen=2150 last_return=-99.3 (+1 eps) [worker 2] episodes_seen=2120 last_return=-80.1 (+1 eps) [worker 1] episodes_seen=2130 last_return=-81.2 (+1 eps) [worker 3] episodes_seen=2150 last_return=-87.2 (+1 eps) [A2C][sync] it= 7009 steps= 841080 (+120) avg10= -87.07 loss=193.316 pg=0.017 vf=297.391 H=1.074 gn=474.230 [worker 2] episodes_seen=2130 last_return=-57.3 (+1 eps) [worker 0] episodes_seen=2160 last_return=-98.9 (+1 eps) [worker 1] episodes_seen=2140 last_return=-106.5 (+1 eps) [worker 3] episodes_seen=2160 last_return=-263.7 (+1 eps) [worker 0] episodes_seen=2170 last_return=-112.0 (+1 eps) [worker 2] episodes_seen=2140 last_return=-93.2 (+1 eps) [worker 1] episodes_seen=2150 last_return=-110.7 (+1 eps) [worker 3] episodes_seen=2170 last_return=-107.5 (+1 eps) [worker 0] episodes_seen=2180 last_return=-26.5 (+1 eps) [worker 2] episodes_seen=2150 last_return=-71.5 (+1 eps) [worker 1] episodes_seen=2160 last_return=-108.6 (+1 eps) [worker 3] episodes_seen=2180 last_return=-176.5 (+1 eps) [worker 2] episodes_seen=2160 last_return=-73.7 (+1 eps) [worker 0] episodes_seen=2190 last_return=-99.9 (+1 eps) [worker 1] episodes_seen=2170 last_return=-107.1 (+1 eps) [worker 3] episodes_seen=2190 last_return=-43.3 (+1 eps) [worker 2] episodes_seen=2170 last_return=-38.4 (+1 eps) [worker 0] episodes_seen=2200 last_return=-56.5 (+1 eps) [worker 1] episodes_seen=2180 last_return=-68.7 (+1 eps) [worker 2] episodes_seen=2180 last_return=-97.2 (+1 eps) [worker 3] episodes_seen=2200 last_return=-91.5 (+1 eps) [worker 0] episodes_seen=2210 last_return=-73.8 (+1 eps) [worker 1] episodes_seen=2190 last_return=-84.7 (+1 eps) [worker 3] episodes_seen=2210 last_return=-53.3 (+1 eps) [worker 2] episodes_seen=2190 last_return=-136.0 (+1 eps) [worker 0] episodes_seen=2220 last_return=-110.7 (+1 eps) [worker 1] episodes_seen=2200 last_return=-87.0 (+1 eps) [worker 3] episodes_seen=2220 last_return=-76.3 (+1 eps) [worker 0] episodes_seen=2230 last_return=-74.8 (+1 eps) [worker 2] episodes_seen=2200 last_return=-73.1 (+1 eps) [worker 0] episodes_seen=2240 last_return=-72.5 (+1 eps) [worker 3] episodes_seen=2230 last_return=-117.0 (+1 eps) [worker 1] episodes_seen=2210 last_return=-77.5 (+1 eps) [worker 2] episodes_seen=2210 last_return=-140.9 (+1 eps) [worker 0] episodes_seen=2250 last_return=-106.8 (+1 eps) [worker 3] episodes_seen=2240 last_return=-132.2 (+1 eps) [worker 1] episodes_seen=2220 last_return=-94.4 (+1 eps) [worker 2] episodes_seen=2220 last_return=-83.8 (+1 eps) [worker 0] episodes_seen=2260 last_return=-39.7 (+1 eps) [worker 2] episodes_seen=2230 last_return=-82.9 (+1 eps) [worker 3] episodes_seen=2250 last_return=-91.8 (+1 eps) [worker 1] episodes_seen=2230 last_return=-115.6 (+1 eps) [worker 0] episodes_seen=2270 last_return=-68.6 (+1 eps) [worker 3] episodes_seen=2260 last_return=-76.6 (+1 eps) [worker 1] episodes_seen=2240 last_return=-91.0 (+1 eps) [worker 2] episodes_seen=2240 last_return=-94.8 (+1 eps) [worker 0] episodes_seen=2280 last_return=-87.5 (+1 eps) [worker 3] episodes_seen=2270 last_return=-94.7 (+1 eps) [worker 1] episodes_seen=2250 last_return=-84.8 (+1 eps) [worker 2] episodes_seen=2250 last_return=-37.8 (+1 eps) [worker 0] episodes_seen=2290 last_return=-40.5 (+1 eps) [worker 1] episodes_seen=2260 last_return=-68.9 (+1 eps) [worker 3] episodes_seen=2280 last_return=-72.8 (+1 eps) [A2C][sync] it= 7593 steps= 911160 (+120) avg10= -81.51 loss=71.726 pg=-0.073 vf=110.467 H=1.009 gn=292.477 [worker 2] episodes_seen=2260 last_return=-94.7 (+1 eps) [worker 0] episodes_seen=2300 last_return=-69.1 (+1 eps) [worker 1] episodes_seen=2270 last_return=56.7 (+1 eps) [worker 3] episodes_seen=2290 last_return=-117.0 (+1 eps) [worker 2] episodes_seen=2270 last_return=-77.0 (+1 eps) [worker 0] episodes_seen=2310 last_return=-100.5 (+1 eps) [worker 3] episodes_seen=2300 last_return=-92.7 (+1 eps) [worker 1] episodes_seen=2280 last_return=-81.0 (+1 eps) [worker 2] episodes_seen=2280 last_return=-55.6 (+1 eps) [worker 0] episodes_seen=2320 last_return=-100.0 (+1 eps) [worker 3] episodes_seen=2310 last_return=-117.2 (+1 eps) [worker 1] episodes_seen=2290 last_return=-106.6 (+1 eps) [worker 2] episodes_seen=2290 last_return=-48.1 (+1 eps) [worker 0] episodes_seen=2330 last_return=-7.4 (+1 eps) [worker 3] episodes_seen=2320 last_return=-68.7 (+1 eps) [worker 1] episodes_seen=2300 last_return=-138.7 (+1 eps) [worker 2] episodes_seen=2300 last_return=-81.1 (+1 eps) [worker 0] episodes_seen=2340 last_return=-79.1 (+1 eps) [worker 3] episodes_seen=2330 last_return=-99.4 (+1 eps) [worker 1] episodes_seen=2310 last_return=-114.8 (+1 eps) [worker 0] episodes_seen=2350 last_return=-108.3 (+1 eps) [worker 2] episodes_seen=2310 last_return=-86.3 (+1 eps) [worker 3] episodes_seen=2340 last_return=-67.4 (+1 eps) [worker 1] episodes_seen=2320 last_return=-112.5 (+1 eps) [worker 0] episodes_seen=2360 last_return=-85.2 (+1 eps) [worker 3] episodes_seen=2350 last_return=-44.7 (+1 eps) [worker 1] episodes_seen=2330 last_return=-76.8 (+1 eps) [worker 2] episodes_seen=2320 last_return=-60.5 (+1 eps) [worker 0] episodes_seen=2370 last_return=-60.2 (+1 eps) [worker 3] episodes_seen=2360 last_return=-75.3 (+1 eps) [worker 1] episodes_seen=2340 last_return=-108.7 (+1 eps) [worker 2] episodes_seen=2330 last_return=-78.2 (+1 eps) [worker 3] episodes_seen=2370 last_return=-44.9 (+1 eps) [worker 0] episodes_seen=2380 last_return=-93.4 (+1 eps) [worker 1] episodes_seen=2350 last_return=-82.9 (+1 eps) [worker 2] episodes_seen=2340 last_return=-52.6 (+1 eps) [worker 0] episodes_seen=2390 last_return=-94.3 (+1 eps) [worker 1] episodes_seen=2360 last_return=-104.9 (+1 eps) [worker 2] episodes_seen=2350 last_return=-72.8 (+1 eps) [worker 3] episodes_seen=2380 last_return=-110.6 (+1 eps) [worker 0] episodes_seen=2400 last_return=-79.1 (+1 eps) [worker 1] episodes_seen=2370 last_return=-264.7 (+1 eps) [worker 2] episodes_seen=2360 last_return=-77.3 (+1 eps) [worker 3] episodes_seen=2390 last_return=-87.0 (+1 eps) [worker 3] episodes_seen=2400 last_return=-143.9 (+1 eps) [worker 1] episodes_seen=2380 last_return=-127.9 (+1 eps) [worker 2] episodes_seen=2370 last_return=-219.4 (+1 eps) [worker 0] episodes_seen=2410 last_return=-70.2 (+1 eps) [worker 3] episodes_seen=2410 last_return=-115.2 (+1 eps) [A2C][sync] it= 8177 steps= 981240 (+120) avg10= -88.51 loss=147.468 pg=-0.056 vf=226.969 H=1.027 gn=449.715 [worker 1] episodes_seen=2390 last_return=-90.9 (+1 eps) [worker 0] episodes_seen=2420 last_return=-89.4 (+1 eps) [worker 2] episodes_seen=2380 last_return=-87.5 (+1 eps) [worker 3] episodes_seen=2420 last_return=-94.5 (+1 eps) [worker 2] episodes_seen=2390 last_return=-138.0 (+1 eps) [worker 1] episodes_seen=2400 last_return=-107.4 (+1 eps) [worker 0] episodes_seen=2430 last_return=-81.6 (+1 eps) [worker 3] episodes_seen=2430 last_return=-63.8 (+1 eps) [worker 2] episodes_seen=2400 last_return=-119.4 (+1 eps) [worker 1] episodes_seen=2410 last_return=-90.2 (+1 eps) [worker 0] episodes_seen=2440 last_return=-158.0 (+1 eps) [worker 3] episodes_seen=2440 last_return=-86.8 (+1 eps) [worker 2] episodes_seen=2410 last_return=-55.4 (+1 eps) [worker 1] episodes_seen=2420 last_return=-78.4 (+1 eps) [worker 0] episodes_seen=2450 last_return=-185.0 (+1 eps) [worker 3] episodes_seen=2450 last_return=-103.9 (+1 eps) [worker 2] episodes_seen=2420 last_return=-65.2 (+1 eps) [worker 1] episodes_seen=2430 last_return=-41.8 (+1 eps) [worker 2] episodes_seen=2430 last_return=-63.6 (+1 eps) [worker 1] episodes_seen=2440 last_return=-34.4 (+1 eps) [worker 3] episodes_seen=2460 last_return=-120.4 (+1 eps) [worker 0] episodes_seen=2460 last_return=-112.7 (+1 eps) [worker 2] episodes_seen=2440 last_return=-141.9 (+1 eps) [worker 1] episodes_seen=2450 last_return=-264.2 (+1 eps) [worker 0] episodes_seen=2470 last_return=-279.7 (+1 eps) [worker 3] episodes_seen=2470 last_return=-233.9 (+1 eps) [worker 2] episodes_seen=2450 last_return=-105.8 (+1 eps) [worker 1] episodes_seen=2460 last_return=-157.4 (+1 eps) [worker 0] episodes_seen=2480 last_return=-128.5 (+1 eps) [worker 3] episodes_seen=2480 last_return=-289.4 (+1 eps) [worker 2] episodes_seen=2460 last_return=-167.3 (+1 eps) [worker 1] episodes_seen=2470 last_return=-200.3 (+1 eps) [worker 0] episodes_seen=2490 last_return=-347.3 (+1 eps) [worker 3] episodes_seen=2490 last_return=-179.5 (+1 eps) [worker 2] episodes_seen=2470 last_return=-382.9 (+1 eps) [worker 1] episodes_seen=2480 last_return=-211.5 (+1 eps) [worker 0] episodes_seen=2500 last_return=-196.6 (+1 eps) [worker 3] episodes_seen=2500 last_return=-315.1 (+1 eps) [worker 2] episodes_seen=2480 last_return=-323.4 (+1 eps) [worker 1] episodes_seen=2490 last_return=-294.9 (+1 eps) [worker 0] episodes_seen=2510 last_return=-195.3 (+1 eps) [worker 3] episodes_seen=2510 last_return=-308.5 (+1 eps) [worker 1] episodes_seen=2500 last_return=-219.9 (+1 eps) [worker 2] episodes_seen=2490 last_return=-120.5 (+1 eps) [worker 0] episodes_seen=2520 last_return=-185.2 (+1 eps) [worker 3] episodes_seen=2520 last_return=-345.1 (+1 eps) [worker 1] episodes_seen=2510 last_return=-194.7 (+1 eps) [worker 2] episodes_seen=2500 last_return=-336.4 (+1 eps) [worker 0] episodes_seen=2530 last_return=-368.2 (+1 eps) [worker 3] episodes_seen=2530 last_return=-194.7 (+1 eps) [worker 2] episodes_seen=2510 last_return=-173.3 (+1 eps) [worker 1] episodes_seen=2520 last_return=-223.6 (+1 eps) [A2C][sync] it= 8761 steps= 1051320 (+120) avg10=-218.57 loss=26085.123 pg=-0.039 vf=40131.020 H=0.054 gn=26146.254 [worker 0] episodes_seen=2540 last_return=-174.7 (+1 eps) [worker 3] episodes_seen=2540 last_return=-285.2 (+1 eps) [worker 2] episodes_seen=2520 last_return=-175.2 (+1 eps) [worker 1] episodes_seen=2530 last_return=-264.7 (+1 eps) [worker 0] episodes_seen=2550 last_return=-217.1 (+1 eps) [worker 3] episodes_seen=2550 last_return=-195.7 (+1 eps) [worker 2] episodes_seen=2530 last_return=-500.4 (+1 eps) [worker 1] episodes_seen=2540 last_return=-110.4 (+1 eps) [worker 3] episodes_seen=2560 last_return=-129.3 (+1 eps) [worker 0] episodes_seen=2560 last_return=-129.9 (+1 eps) [worker 1] episodes_seen=2550 last_return=-258.8 (+1 eps) [worker 2] episodes_seen=2540 last_return=-229.3 (+1 eps) [worker 3] episodes_seen=2570 last_return=-173.3 (+1 eps) [worker 0] episodes_seen=2570 last_return=-313.5 (+1 eps) [worker 2] episodes_seen=2550 last_return=-149.6 (+1 eps) [worker 1] episodes_seen=2560 last_return=-261.9 (+1 eps) [worker 3] episodes_seen=2580 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=2580 last_return=-322.1 (+1 eps) [worker 2] episodes_seen=2560 last_return=-238.1 (+1 eps) [worker 1] episodes_seen=2570 last_return=-123.8 (+1 eps) [worker 3] episodes_seen=2590 last_return=-317.3 (+1 eps) [worker 0] episodes_seen=2590 last_return=-255.9 (+1 eps) [worker 2] episodes_seen=2570 last_return=-143.2 (+1 eps) [worker 1] episodes_seen=2580 last_return=-254.4 (+1 eps) [worker 3] episodes_seen=2600 last_return=-286.5 (+1 eps) [worker 0] episodes_seen=2600 last_return=-217.5 (+1 eps) [worker 1] episodes_seen=2590 last_return=-295.7 (+1 eps) [worker 2] episodes_seen=2580 last_return=-218.4 (+1 eps) [worker 3] episodes_seen=2610 last_return=-167.5 (+1 eps) [worker 0] episodes_seen=2610 last_return=-311.5 (+1 eps) [worker 1] episodes_seen=2600 last_return=-345.1 (+1 eps) [worker 2] episodes_seen=2590 last_return=-330.6 (+1 eps) [worker 3] episodes_seen=2620 last_return=-160.1 (+1 eps) [worker 0] episodes_seen=2620 last_return=-271.3 (+1 eps) [worker 1] episodes_seen=2610 last_return=-318.0 (+1 eps) [worker 2] episodes_seen=2600 last_return=-384.8 (+1 eps) [worker 3] episodes_seen=2630 last_return=-183.8 (+1 eps) [worker 0] episodes_seen=2630 last_return=-276.1 (+1 eps) [worker 1] episodes_seen=2620 last_return=-125.8 (+1 eps) [worker 3] episodes_seen=2640 last_return=-227.6 (+1 eps) [worker 0] episodes_seen=2640 last_return=-107.5 (+1 eps) [worker 2] episodes_seen=2610 last_return=-111.3 (+1 eps) [worker 3] episodes_seen=2650 last_return=-279.6 (+1 eps) [worker 1] episodes_seen=2630 last_return=-172.4 (+1 eps) [worker 0] episodes_seen=2650 last_return=-132.3 (+1 eps) [worker 2] episodes_seen=2620 last_return=-122.7 (+1 eps) [worker 3] episodes_seen=2660 last_return=-211.3 (+1 eps) [worker 1] episodes_seen=2640 last_return=-158.8 (+1 eps) [worker 0] episodes_seen=2660 last_return=-319.4 (+1 eps) [worker 3] episodes_seen=2670 last_return=-114.5 (+1 eps) [worker 1] episodes_seen=2650 last_return=-49.8 (+1 eps) [worker 0] episodes_seen=2670 last_return=-210.2 (+1 eps) [worker 2] episodes_seen=2630 last_return=-79.2 (+1 eps) [A2C][sync] it= 9345 steps= 1121400 (+120) avg10= -78.23 loss=496.681 pg=-0.013 vf=764.153 H=1.083 gn=2640.816 [worker 1] episodes_seen=2660 last_return=-73.2 (+1 eps) [worker 0] episodes_seen=2680 last_return=-71.0 (+1 eps) [worker 2] episodes_seen=2640 last_return=-90.1 (+1 eps) [worker 1] episodes_seen=2670 last_return=-88.3 (+1 eps) [worker 3] episodes_seen=2680 last_return=-0.1 (+1 eps) [worker 0] episodes_seen=2690 last_return=-78.6 (+1 eps) [worker 2] episodes_seen=2650 last_return=-87.6 (+1 eps) [worker 1] episodes_seen=2680 last_return=-47.0 (+1 eps) [worker 3] episodes_seen=2690 last_return=-50.0 (+1 eps) [worker 0] episodes_seen=2700 last_return=-79.7 (+1 eps) [worker 2] episodes_seen=2660 last_return=-91.4 (+1 eps) [worker 3] episodes_seen=2700 last_return=-75.1 (+1 eps) [worker 1] episodes_seen=2690 last_return=-271.4 (+1 eps) [worker 0] episodes_seen=2710 last_return=-76.1 (+1 eps) [worker 2] episodes_seen=2670 last_return=-64.7 (+1 eps) [worker 3] episodes_seen=2710 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2700 last_return=-69.6 (+1 eps) [worker 0] episodes_seen=2720 last_return=-95.5 (+1 eps) [worker 2] episodes_seen=2680 last_return=-79.2 (+1 eps) [worker 3] episodes_seen=2720 last_return=-123.3 (+1 eps) [worker 1] episodes_seen=2710 last_return=-73.5 (+1 eps) [worker 0] episodes_seen=2730 last_return=-75.1 (+1 eps) [worker 2] episodes_seen=2690 last_return=-82.9 (+1 eps) [worker 3] episodes_seen=2730 last_return=-93.2 (+1 eps) [worker 1] episodes_seen=2720 last_return=-116.6 (+1 eps) [worker 0] episodes_seen=2740 last_return=-68.6 (+1 eps) [worker 2] episodes_seen=2700 last_return=-36.7 (+1 eps) [worker 3] episodes_seen=2740 last_return=-83.9 (+1 eps) [worker 1] episodes_seen=2730 last_return=-97.0 (+1 eps) [worker 0] episodes_seen=2750 last_return=-23.5 (+1 eps) [worker 2] episodes_seen=2710 last_return=-104.8 (+1 eps) [worker 3] episodes_seen=2750 last_return=-75.5 (+1 eps) [worker 1] episodes_seen=2740 last_return=-80.6 (+1 eps) [worker 0] episodes_seen=2760 last_return=-106.2 (+1 eps) [worker 2] episodes_seen=2720 last_return=-125.3 (+1 eps) [worker 1] episodes_seen=2750 last_return=-27.9 (+1 eps) [worker 3] episodes_seen=2760 last_return=-61.1 (+1 eps) [worker 0] episodes_seen=2770 last_return=-94.7 (+1 eps) [worker 2] episodes_seen=2730 last_return=-45.3 (+1 eps) [worker 0] episodes_seen=2780 last_return=-119.2 (+1 eps) [worker 3] episodes_seen=2770 last_return=-84.5 (+1 eps) [worker 2] episodes_seen=2740 last_return=-95.2 (+1 eps) [worker 1] episodes_seen=2760 last_return=-32.5 (+1 eps) [worker 0] episodes_seen=2790 last_return=-91.7 (+1 eps) [worker 2] episodes_seen=2750 last_return=-99.8 (+1 eps) [worker 1] episodes_seen=2770 last_return=-58.6 (+1 eps) [worker 3] episodes_seen=2780 last_return=-89.6 (+1 eps) [worker 3] episodes_seen=2790 last_return=-65.0 (+1 eps) [worker 0] episodes_seen=2800 last_return=-64.6 (+1 eps) [worker 2] episodes_seen=2760 last_return=-82.3 (+1 eps) [worker 1] episodes_seen=2780 last_return=-82.6 (+1 eps) [worker 3] episodes_seen=2800 last_return=-102.9 (+1 eps) [worker 0] episodes_seen=2810 last_return=-104.5 (+1 eps) [worker 1] episodes_seen=2790 last_return=-120.0 (+1 eps) [worker 2] episodes_seen=2770 last_return=-1.3 (+1 eps) [worker 0] episodes_seen=2820 last_return=-85.5 (+1 eps) [worker 1] episodes_seen=2800 last_return=-85.8 (+1 eps) [worker 3] episodes_seen=2810 last_return=-102.2 (+1 eps) [worker 2] episodes_seen=2780 last_return=-135.9 (+1 eps) [worker 1] episodes_seen=2810 last_return=-57.1 (+1 eps) [worker 0] episodes_seen=2830 last_return=-90.8 (+1 eps) [A2C][sync] it= 9929 steps= 1191480 (+120) avg10= -85.37 loss=88.017 pg=-0.020 vf=135.450 H=1.056 gn=452.221 [worker 3] episodes_seen=2820 last_return=-65.9 (+1 eps) [worker 2] episodes_seen=2790 last_return=-111.2 (+1 eps) [worker 1] episodes_seen=2820 last_return=-188.2 (+1 eps) [worker 0] episodes_seen=2840 last_return=-202.1 (+1 eps) [worker 3] episodes_seen=2830 last_return=-119.6 (+1 eps) [worker 1] episodes_seen=2830 last_return=-101.0 (+1 eps) [worker 0] episodes_seen=2850 last_return=-238.1 (+1 eps) [worker 2] episodes_seen=2800 last_return=-187.2 (+1 eps) [worker 3] episodes_seen=2840 last_return=-116.8 (+1 eps) [worker 0] episodes_seen=2860 last_return=-99.1 (+1 eps) [worker 2] episodes_seen=2810 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2840 last_return=-61.0 (+1 eps) [worker 3] episodes_seen=2850 last_return=-85.3 (+1 eps) [worker 0] episodes_seen=2870 last_return=-59.0 (+1 eps) [worker 2] episodes_seen=2820 last_return=-96.7 (+1 eps) [worker 3] episodes_seen=2860 last_return=-105.9 (+1 eps) [worker 0] episodes_seen=2880 last_return=-52.5 (+1 eps) [worker 1] episodes_seen=2850 last_return=-50.3 (+1 eps) [worker 2] episodes_seen=2830 last_return=-34.6 (+1 eps) [worker 3] episodes_seen=2870 last_return=-78.3 (+1 eps) [worker 2] episodes_seen=2840 last_return=-118.0 (+1 eps) [worker 0] episodes_seen=2890 last_return=-54.4 (+1 eps) [worker 1] episodes_seen=2860 last_return=-114.4 (+1 eps) [worker 3] episodes_seen=2880 last_return=-17.9 (+1 eps) [worker 2] episodes_seen=2850 last_return=-75.7 (+1 eps) [worker 0] episodes_seen=2900 last_return=-66.0 (+1 eps) [worker 1] episodes_seen=2870 last_return=-111.3 (+1 eps) [worker 3] episodes_seen=2890 last_return=-43.0 (+1 eps) [worker 1] episodes_seen=2880 last_return=-65.6 (+1 eps) [worker 0] episodes_seen=2910 last_return=-50.8 (+1 eps) [worker 2] episodes_seen=2860 last_return=-14.3 (+1 eps) [worker 3] episodes_seen=2900 last_return=-77.8 (+1 eps) [worker 1] episodes_seen=2890 last_return=-183.6 (+1 eps) [worker 0] episodes_seen=2920 last_return=-92.5 (+1 eps) [worker 2] episodes_seen=2870 last_return=-72.4 (+1 eps) [worker 1] episodes_seen=2900 last_return=-93.8 (+1 eps) [worker 3] episodes_seen=2910 last_return=-62.5 (+1 eps) [worker 0] episodes_seen=2930 last_return=-98.7 (+1 eps) [worker 2] episodes_seen=2880 last_return=-50.4 (+1 eps) [worker 1] episodes_seen=2910 last_return=-82.3 (+1 eps) [worker 3] episodes_seen=2920 last_return=-62.0 (+1 eps) [worker 0] episodes_seen=2940 last_return=-101.0 (+1 eps) [worker 2] episodes_seen=2890 last_return=-73.8 (+1 eps) [worker 1] episodes_seen=2920 last_return=-120.8 (+1 eps) [worker 3] episodes_seen=2930 last_return=-49.9 (+1 eps) [A2C][sync] it=10513 steps= 1261560 (+120) avg10= -87.46 loss=176.836 pg=-0.035 vf=272.118 H=1.032 gn=485.198 [worker 1] episodes_seen=2930 last_return=-81.9 (+1 eps) [worker 0] episodes_seen=2950 last_return=-47.1 (+1 eps) [worker 2] episodes_seen=2900 last_return=-90.9 (+1 eps) [worker 3] episodes_seen=2940 last_return=-12.7 (+1 eps) [worker 1] episodes_seen=2940 last_return=-88.3 (+1 eps) [worker 0] episodes_seen=2960 last_return=3.2 (+1 eps) [worker 3] episodes_seen=2950 last_return=-53.6 (+1 eps) [worker 2] episodes_seen=2910 last_return=-57.0 (+1 eps) [worker 1] episodes_seen=2950 last_return=-54.1 (+1 eps) [worker 0] episodes_seen=2970 last_return=-99.6 (+1 eps) [worker 3] episodes_seen=2960 last_return=-304.0 (+1 eps) [worker 2] episodes_seen=2920 last_return=-29.9 (+1 eps) [worker 1] episodes_seen=2960 last_return=-57.3 (+1 eps) [worker 0] episodes_seen=2980 last_return=-87.7 (+1 eps) [worker 3] episodes_seen=2970 last_return=-46.2 (+1 eps) [worker 0] episodes_seen=2990 last_return=-90.5 (+1 eps) [worker 1] episodes_seen=2970 last_return=-79.3 (+1 eps) [worker 2] episodes_seen=2930 last_return=-35.2 (+1 eps) [worker 3] episodes_seen=2980 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=3000 last_return=-142.2 (+1 eps) [worker 1] episodes_seen=2980 last_return=-89.6 (+1 eps) [worker 2] episodes_seen=2940 last_return=-96.9 (+1 eps) [worker 3] episodes_seen=2990 last_return=-85.6 (+1 eps) [worker 2] episodes_seen=2950 last_return=-65.2 (+1 eps) [worker 1] episodes_seen=2990 last_return=-93.7 (+1 eps) [worker 0] episodes_seen=3010 last_return=-37.2 (+1 eps) [worker 3] episodes_seen=3000 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=3000 last_return=-29.3 (+1 eps) [worker 0] episodes_seen=3020 last_return=-59.6 (+1 eps) [worker 2] episodes_seen=2960 last_return=-104.3 (+1 eps) [worker 1] episodes_seen=3010 last_return=-171.1 (+1 eps) [worker 3] episodes_seen=3010 last_return=-125.7 (+1 eps) [worker 2] episodes_seen=2970 last_return=-186.8 (+1 eps) [worker 1] episodes_seen=3020 last_return=-121.6 (+1 eps) [worker 0] episodes_seen=3030 last_return=-188.1 (+1 eps) [worker 3] episodes_seen=3020 last_return=-151.3 (+1 eps) [worker 2] episodes_seen=2980 last_return=-74.9 (+1 eps) [worker 0] episodes_seen=3040 last_return=-33.7 (+1 eps) [worker 1] episodes_seen=3030 last_return=-54.0 (+1 eps) [worker 3] episodes_seen=3030 last_return=-50.1 (+1 eps) [A2C][sync] it=11097 steps= 1331640 (+120) avg10= -95.11 loss=1777.548 pg=0.035 vf=2734.643 H=0.891 gn=5726.781 [worker 1] episodes_seen=3040 last_return=-82.1 (+1 eps) [worker 3] episodes_seen=3040 last_return=-104.8 (+1 eps) [worker 0] episodes_seen=3050 last_return=-70.4 (+1 eps) [worker 2] episodes_seen=2990 last_return=-91.4 (+1 eps) [worker 3] episodes_seen=3050 last_return=-55.6 (+1 eps) [worker 2] episodes_seen=3000 last_return=-84.5 (+1 eps) [worker 1] episodes_seen=3050 last_return=-38.7 (+1 eps) [worker 0] episodes_seen=3060 last_return=-76.8 (+1 eps) [worker 3] episodes_seen=3060 last_return=-103.0 (+1 eps) [worker 1] episodes_seen=3060 last_return=-68.9 (+1 eps) [worker 2] episodes_seen=3010 last_return=-48.4 (+1 eps) [worker 0] episodes_seen=3070 last_return=-99.2 (+1 eps) [worker 3] episodes_seen=3070 last_return=-56.2 (+1 eps) [worker 0] episodes_seen=3080 last_return=-74.3 (+1 eps) [worker 1] episodes_seen=3070 last_return=-63.7 (+1 eps) [worker 2] episodes_seen=3020 last_return=-127.8 (+1 eps) [worker 3] episodes_seen=3080 last_return=-49.3 (+1 eps) [worker 0] episodes_seen=3090 last_return=-91.3 (+1 eps) [worker 2] episodes_seen=3030 last_return=-32.8 (+1 eps) [worker 1] episodes_seen=3080 last_return=-60.5 (+1 eps) [worker 3] episodes_seen=3090 last_return=-39.5 (+1 eps) [worker 1] episodes_seen=3090 last_return=-96.4 (+1 eps) [worker 0] episodes_seen=3100 last_return=-81.8 (+1 eps) [worker 2] episodes_seen=3040 last_return=-99.3 (+1 eps) [worker 3] episodes_seen=3100 last_return=-38.6 (+1 eps) [worker 1] episodes_seen=3100 last_return=-90.3 (+1 eps) [worker 3] episodes_seen=3110 last_return=-85.5 (+1 eps) [worker 2] episodes_seen=3050 last_return=-49.7 (+1 eps) [worker 0] episodes_seen=3110 last_return=-122.8 (+1 eps) [worker 1] episodes_seen=3110 last_return=-37.2 (+1 eps) [worker 2] episodes_seen=3060 last_return=-72.0 (+1 eps) [worker 3] episodes_seen=3120 last_return=-129.3 (+1 eps) [worker 0] episodes_seen=3120 last_return=-75.0 (+1 eps) [worker 2] episodes_seen=3070 last_return=-97.8 (+1 eps) [worker 3] episodes_seen=3130 last_return=-135.2 (+1 eps) [worker 1] episodes_seen=3120 last_return=-134.6 (+1 eps) [worker 0] episodes_seen=3130 last_return=-92.9 (+1 eps) [worker 3] episodes_seen=3140 last_return=-98.0 (+1 eps) [worker 2] episodes_seen=3080 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=3130 last_return=-84.8 (+1 eps) [worker 0] episodes_seen=3140 last_return=-102.5 (+1 eps) [worker 3] episodes_seen=3150 last_return=-64.5 (+1 eps) [A2C][sync] it=11667 steps= 1400040 (+120) avg10= -61.07 loss=120.928 pg=-0.054 vf=186.135 H=1.065 gn=444.468 [Checkpoint] Saved self-describing checkpoint → part2_artifacts/checkpoints/a2c_run19_seed1227.pth [A2C][sync] done: steps=1400040 time=1299.0s avg10=-61.07
[Run run19_seed1227] checkpoint: part2_artifacts/checkpoints/a2c_run19_seed1227.pth [Run run19_seed1227] training plot (tail 500): part2_artifacts/train_curve_run19_seed1227.png [Run run19_seed1227] training plot (full): part2_artifacts/train_curve_full_run19_seed1227.png [Run run19_seed1227] per-worker plot: part2_artifacts/train_curve_workers_run19_seed1227.png [Run run19_seed1227] workers-average plot: part2_artifacts/train_curve_workers_avg_run19_seed1227.png
[Eval run19_seed1227] mean=-112.62 std=36.64 min=-196.49 max=-66.17 [Eval run19_seed1227] CSV: part2_artifacts/eval10_run19_seed1227.csv [Eval run19_seed1227] plot: part2_artifacts/eval10_run19_seed1227.png [Best] ep=7 return=-66.17 seed=1234
/usr/local/lib/python3.12/dist-packages/gymnasium/wrappers/rendering.py:293: UserWarning: WARN: Overwriting existing videos at /content/part2_artifacts/videos/run19_seed1227 folder (try specifying a different `video_folder` for the `RecordVideo` wrapper if this is not desired)
logger.warn(
[Video run19_seed1227] episode return=-66.17 [Video run19_seed1227] saved under: part2_artifacts/videos run19_seed1227 | mean=-112.6±36.6 | best_ep=7, best_ret=-66.2
Run#20
run_id = f"run20_seed{SEED}"
model, logs, paths = train_once(
run_id=run_id,
n_workers=3,
total_env_steps=1_600_000,
T=25,
gamma=0.99,
entropy_coef=0.010,
value_coef=0.60,
max_grad_norm=0.5,
lr=2.5e-4,
log_every=80_000,
)
metrics, _ = evaluate_10(run_id, paths.ckpt_path)
video_dir, best_idx, best_ret = record_best_from_eval(run_id, paths.ckpt_path)
print(f"{run_id} | mean={metrics['mean']:.1f}±{metrics['std']:.1f} | best_ep={best_idx}, best_ret={best_ret:.1f}")
[Run run20_seed1227] starting training… [A2C][sync] start: workers=3, T=25, target_steps=1600000, mp=fork [A2C][sync] it= 1 steps= 75 (+ 75) avg10= nan loss=520.269 pg=-0.000 vf=867.138 H=1.386 gn=80.246 [worker 2] episodes_seen=10 last_return=-117.0 (+1 eps) [worker 0] episodes_seen=10 last_return=-135.3 (+1 eps) [worker 1] episodes_seen=10 last_return=-98.6 (+1 eps) [worker 0] episodes_seen=20 last_return=-85.7 (+1 eps) [worker 2] episodes_seen=20 last_return=-17.8 (+1 eps) [worker 1] episodes_seen=20 last_return=-97.7 (+1 eps) [worker 0] episodes_seen=30 last_return=-123.2 (+1 eps) [worker 2] episodes_seen=30 last_return=-153.0 (+1 eps) [worker 1] episodes_seen=30 last_return=-186.1 (+1 eps) [worker 0] episodes_seen=40 last_return=-245.9 (+1 eps) [worker 2] episodes_seen=40 last_return=-279.5 (+1 eps) [worker 1] episodes_seen=40 last_return=-147.9 (+1 eps) [worker 0] episodes_seen=50 last_return=-162.8 (+1 eps) [worker 2] episodes_seen=50 last_return=-115.6 (+1 eps) [worker 1] episodes_seen=50 last_return=-244.2 (+1 eps) [worker 2] episodes_seen=60 last_return=-288.6 (+1 eps) [worker 0] episodes_seen=60 last_return=-226.2 (+1 eps) [worker 1] episodes_seen=60 last_return=-212.6 (+1 eps) [worker 2] episodes_seen=70 last_return=-178.9 (+1 eps) [worker 0] episodes_seen=70 last_return=-177.3 (+1 eps) [worker 1] episodes_seen=70 last_return=-238.3 (+1 eps) [worker 2] episodes_seen=80 last_return=-110.1 (+1 eps) [worker 0] episodes_seen=80 last_return=-242.3 (+1 eps) [worker 2] episodes_seen=90 last_return=-197.7 (+1 eps) [worker 1] episodes_seen=80 last_return=-151.6 (+1 eps) [worker 0] episodes_seen=90 last_return=-178.2 (+1 eps) [worker 2] episodes_seen=100 last_return=-192.9 (+1 eps) [worker 1] episodes_seen=90 last_return=-272.2 (+1 eps) [worker 0] episodes_seen=100 last_return=-298.0 (+1 eps) [worker 1] episodes_seen=100 last_return=-110.0 (+1 eps) [worker 2] episodes_seen=110 last_return=-272.5 (+1 eps) [worker 0] episodes_seen=110 last_return=-312.6 (+1 eps) [worker 2] episodes_seen=120 last_return=-326.6 (+1 eps) [worker 0] episodes_seen=120 last_return=-219.1 (+1 eps) [worker 1] episodes_seen=110 last_return=-208.6 (+1 eps) [worker 0] episodes_seen=130 last_return=-125.9 (+1 eps) [worker 2] episodes_seen=130 last_return=-108.8 (+1 eps) [worker 1] episodes_seen=120 last_return=-353.0 (+1 eps) [worker 0] episodes_seen=140 last_return=-203.8 (+1 eps) [worker 1] episodes_seen=130 last_return=-320.6 (+1 eps) [worker 2] episodes_seen=140 last_return=-154.1 (+1 eps) [worker 0] episodes_seen=150 last_return=-192.6 (+1 eps) [worker 2] episodes_seen=150 last_return=-153.6 (+1 eps) [worker 1] episodes_seen=140 last_return=-289.7 (+1 eps) [worker 0] episodes_seen=160 last_return=-294.9 (+1 eps) [worker 2] episodes_seen=160 last_return=-169.0 (+1 eps) [worker 1] episodes_seen=150 last_return=-453.0 (+1 eps) [worker 0] episodes_seen=170 last_return=-239.6 (+1 eps) [worker 1] episodes_seen=160 last_return=-108.4 (+1 eps) [worker 2] episodes_seen=170 last_return=-223.5 (+1 eps) [worker 0] episodes_seen=180 last_return=-172.5 (+1 eps) [worker 1] episodes_seen=170 last_return=-133.5 (+1 eps) [worker 2] episodes_seen=180 last_return=-234.2 (+1 eps) [worker 0] episodes_seen=190 last_return=-156.8 (+1 eps) [worker 1] episodes_seen=180 last_return=-187.8 (+1 eps) [worker 2] episodes_seen=190 last_return=-236.5 (+1 eps) [worker 0] episodes_seen=200 last_return=-132.1 (+1 eps) [worker 1] episodes_seen=190 last_return=-234.2 (+1 eps) [worker 2] episodes_seen=200 last_return=-255.1 (+1 eps) [worker 1] episodes_seen=200 last_return=-138.6 (+1 eps) [worker 0] episodes_seen=210 last_return=-196.9 (+1 eps) [worker 2] episodes_seen=210 last_return=-190.2 (+1 eps) [worker 1] episodes_seen=210 last_return=-120.2 (+1 eps) [worker 2] episodes_seen=220 last_return=-108.2 (+1 eps) [worker 0] episodes_seen=220 last_return=-118.9 (+1 eps) [A2C][sync] it= 1068 steps= 80100 (+ 75) avg10=-211.07 loss=33043.641 pg=-0.000 vf=55072.734 H=0.000 gn=32754.955 [worker 1] episodes_seen=220 last_return=-250.2 (+1 eps) [worker 2] episodes_seen=230 last_return=-194.2 (+1 eps) [worker 0] episodes_seen=230 last_return=-314.1 (+1 eps) [worker 1] episodes_seen=230 last_return=-255.8 (+1 eps) [worker 2] episodes_seen=240 last_return=-184.0 (+1 eps) [worker 0] episodes_seen=240 last_return=-113.9 (+1 eps) [worker 1] episodes_seen=240 last_return=-218.2 (+1 eps) [worker 0] episodes_seen=250 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=250 last_return=-358.4 (+1 eps) [worker 1] episodes_seen=250 last_return=-215.6 (+1 eps) [worker 0] episodes_seen=260 last_return=-153.8 (+1 eps) [worker 2] episodes_seen=260 last_return=-195.7 (+1 eps) [worker 1] episodes_seen=260 last_return=-228.2 (+1 eps) [worker 2] episodes_seen=270 last_return=-175.2 (+1 eps) [worker 0] episodes_seen=270 last_return=-257.6 (+1 eps) [worker 1] episodes_seen=270 last_return=-125.0 (+1 eps) [worker 2] episodes_seen=280 last_return=-250.8 (+1 eps) [worker 1] episodes_seen=280 last_return=-258.0 (+1 eps) [worker 0] episodes_seen=280 last_return=-230.9 (+1 eps) [worker 2] episodes_seen=290 last_return=-247.8 (+1 eps) [worker 1] episodes_seen=290 last_return=-180.1 (+1 eps) [worker 0] episodes_seen=290 last_return=-228.7 (+1 eps) [worker 2] episodes_seen=300 last_return=-228.4 (+1 eps) [worker 1] episodes_seen=300 last_return=-230.9 (+1 eps) [worker 0] episodes_seen=300 last_return=-150.7 (+1 eps) [worker 2] episodes_seen=310 last_return=-253.8 (+1 eps) [worker 1] episodes_seen=310 last_return=-189.8 (+1 eps) [worker 0] episodes_seen=310 last_return=-129.2 (+1 eps) [worker 2] episodes_seen=320 last_return=-221.7 (+1 eps) [worker 1] episodes_seen=320 last_return=-227.5 (+1 eps) [worker 0] episodes_seen=320 last_return=-118.0 (+1 eps) [worker 1] episodes_seen=330 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=330 last_return=-144.9 (+1 eps) [worker 0] episodes_seen=330 last_return=-110.7 (+1 eps) [worker 1] episodes_seen=340 last_return=-181.4 (+1 eps) [worker 2] episodes_seen=340 last_return=-258.0 (+1 eps) [worker 0] episodes_seen=340 last_return=-153.8 (+1 eps) [worker 1] episodes_seen=350 last_return=-226.7 (+1 eps) [worker 2] episodes_seen=350 last_return=-271.0 (+1 eps) [worker 0] episodes_seen=350 last_return=-142.4 (+1 eps) [worker 1] episodes_seen=360 last_return=-236.8 (+1 eps) [worker 2] episodes_seen=360 last_return=-264.3 (+1 eps) [worker 1] episodes_seen=370 last_return=-185.7 (+1 eps) [worker 0] episodes_seen=360 last_return=-156.9 (+1 eps) [worker 2] episodes_seen=370 last_return=-284.8 (+1 eps) [worker 1] episodes_seen=380 last_return=-215.5 (+1 eps) [worker 0] episodes_seen=370 last_return=-212.6 (+1 eps) [worker 2] episodes_seen=380 last_return=-320.7 (+1 eps) [worker 0] episodes_seen=380 last_return=-156.7 (+1 eps) [worker 1] episodes_seen=390 last_return=-268.0 (+1 eps) [worker 2] episodes_seen=390 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=390 last_return=-135.2 (+1 eps) [worker 1] episodes_seen=400 last_return=-159.2 (+1 eps) [worker 2] episodes_seen=400 last_return=-177.4 (+1 eps) [worker 0] episodes_seen=400 last_return=-159.5 (+1 eps) [worker 1] episodes_seen=410 last_return=-225.3 (+1 eps) [worker 2] episodes_seen=410 last_return=-170.4 (+1 eps) [worker 0] episodes_seen=410 last_return=-383.5 (+1 eps) [worker 1] episodes_seen=420 last_return=-216.4 (+1 eps) [worker 2] episodes_seen=420 last_return=-113.4 (+1 eps) [worker 0] episodes_seen=420 last_return=-235.1 (+1 eps) [worker 1] episodes_seen=430 last_return=-268.0 (+1 eps) [worker 2] episodes_seen=430 last_return=-233.5 (+1 eps) [worker 0] episodes_seen=430 last_return=-108.4 (+1 eps) [worker 1] episodes_seen=440 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=440 last_return=-303.2 (+1 eps) [A2C][sync] it= 2135 steps= 160125 (+ 75) avg10=-220.56 loss=55193.691 pg=-0.000 vf=91989.484 H=0.000 gn=34941.211 [worker 0] episodes_seen=440 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=450 last_return=-158.9 (+1 eps) [worker 2] episodes_seen=450 last_return=-216.9 (+1 eps) [worker 0] episodes_seen=450 last_return=-328.7 (+1 eps) [worker 1] episodes_seen=460 last_return=-239.6 (+1 eps) [worker 2] episodes_seen=460 last_return=-162.3 (+1 eps) [worker 0] episodes_seen=460 last_return=-129.0 (+1 eps) [worker 1] episodes_seen=470 last_return=-167.4 (+1 eps) [worker 2] episodes_seen=470 last_return=-231.9 (+1 eps) [worker 0] episodes_seen=470 last_return=-296.1 (+1 eps) [worker 1] episodes_seen=480 last_return=-146.9 (+1 eps) [worker 2] episodes_seen=480 last_return=-128.2 (+1 eps) [worker 0] episodes_seen=480 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=490 last_return=-244.9 (+1 eps) [worker 2] episodes_seen=490 last_return=-143.0 (+1 eps) [worker 0] episodes_seen=490 last_return=-242.4 (+1 eps) [worker 1] episodes_seen=500 last_return=-240.7 (+1 eps) [worker 2] episodes_seen=500 last_return=-219.9 (+1 eps) [worker 0] episodes_seen=500 last_return=-107.9 (+1 eps) [worker 1] episodes_seen=510 last_return=-280.3 (+1 eps) [worker 2] episodes_seen=510 last_return=-188.9 (+1 eps) [worker 0] episodes_seen=510 last_return=-115.1 (+1 eps) [worker 1] episodes_seen=520 last_return=-269.0 (+1 eps) [worker 2] episodes_seen=520 last_return=-202.7 (+1 eps) [worker 0] episodes_seen=520 last_return=-244.5 (+1 eps) [worker 2] episodes_seen=530 last_return=-267.3 (+1 eps) [worker 1] episodes_seen=530 last_return=-138.4 (+1 eps) [worker 0] episodes_seen=530 last_return=-317.3 (+1 eps) [worker 2] episodes_seen=540 last_return=-233.7 (+1 eps) [worker 0] episodes_seen=540 last_return=-117.8 (+1 eps) [worker 1] episodes_seen=540 last_return=-259.0 (+1 eps) [worker 2] episodes_seen=550 last_return=-200.8 (+1 eps) [worker 0] episodes_seen=550 last_return=-268.3 (+1 eps) [worker 1] episodes_seen=550 last_return=-187.8 (+1 eps) [worker 2] episodes_seen=560 last_return=-134.3 (+1 eps) [worker 1] episodes_seen=560 last_return=-173.8 (+1 eps) [worker 0] episodes_seen=560 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=570 last_return=-192.5 (+1 eps) [worker 0] episodes_seen=570 last_return=-238.8 (+1 eps) [worker 1] episodes_seen=570 last_return=-139.3 (+1 eps) [worker 0] episodes_seen=580 last_return=-232.8 (+1 eps) [worker 2] episodes_seen=580 last_return=-196.6 (+1 eps) [worker 1] episodes_seen=580 last_return=-191.1 (+1 eps) [worker 0] episodes_seen=590 last_return=-180.6 (+1 eps) [worker 1] episodes_seen=590 last_return=-223.2 (+1 eps) [worker 2] episodes_seen=590 last_return=-131.2 (+1 eps) [worker 0] episodes_seen=600 last_return=-210.2 (+1 eps) [worker 1] episodes_seen=600 last_return=-307.6 (+1 eps) [worker 2] episodes_seen=600 last_return=-343.5 (+1 eps) [worker 0] episodes_seen=610 last_return=-148.2 (+1 eps) [worker 1] episodes_seen=610 last_return=-237.4 (+1 eps) [worker 2] episodes_seen=610 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=620 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=620 last_return=-107.8 (+1 eps) [worker 2] episodes_seen=620 last_return=-178.4 (+1 eps) [worker 0] episodes_seen=630 last_return=-218.3 (+1 eps) [worker 1] episodes_seen=630 last_return=-239.3 (+1 eps) [worker 2] episodes_seen=630 last_return=-157.0 (+1 eps) [worker 1] episodes_seen=640 last_return=-214.9 (+1 eps) [worker 0] episodes_seen=640 last_return=-283.9 (+1 eps) [worker 2] episodes_seen=640 last_return=-289.4 (+1 eps) [worker 0] episodes_seen=650 last_return=-217.1 (+1 eps) [worker 1] episodes_seen=650 last_return=-159.8 (+1 eps) [worker 2] episodes_seen=650 last_return=-191.8 (+1 eps) [A2C][sync] it= 3202 steps= 240150 (+ 75) avg10=-208.17 loss=1390860.375 pg=-0.000 vf=2318100.500 H=0.000 gn=1301826.875 [worker 1] episodes_seen=660 last_return=-213.6 (+1 eps) [worker 0] episodes_seen=660 last_return=-220.5 (+1 eps) [worker 2] episodes_seen=660 last_return=-211.3 (+1 eps) [worker 0] episodes_seen=670 last_return=-295.9 (+1 eps) [worker 1] episodes_seen=670 last_return=-263.0 (+1 eps) [worker 2] episodes_seen=670 last_return=-117.4 (+1 eps) [worker 0] episodes_seen=680 last_return=-233.3 (+1 eps) [worker 1] episodes_seen=680 last_return=-263.8 (+1 eps) [worker 2] episodes_seen=680 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=690 last_return=-169.8 (+1 eps) [worker 1] episodes_seen=690 last_return=-261.5 (+1 eps) [worker 2] episodes_seen=690 last_return=-198.8 (+1 eps) [worker 0] episodes_seen=700 last_return=-146.5 (+1 eps) [worker 1] episodes_seen=700 last_return=-230.8 (+1 eps) [worker 0] episodes_seen=710 last_return=-125.8 (+1 eps) [worker 2] episodes_seen=700 last_return=-218.8 (+1 eps) [worker 1] episodes_seen=710 last_return=-129.3 (+1 eps) [worker 0] episodes_seen=720 last_return=-234.7 (+1 eps) [worker 2] episodes_seen=710 last_return=-205.0 (+1 eps) [worker 1] episodes_seen=720 last_return=-276.7 (+1 eps) [worker 2] episodes_seen=720 last_return=-208.3 (+1 eps) [worker 0] episodes_seen=730 last_return=-162.5 (+1 eps) [worker 1] episodes_seen=730 last_return=-154.9 (+1 eps) [worker 2] episodes_seen=730 last_return=-185.0 (+1 eps) [worker 0] episodes_seen=740 last_return=-317.0 (+1 eps) [worker 1] episodes_seen=740 last_return=-117.7 (+1 eps) [worker 2] episodes_seen=740 last_return=-388.3 (+1 eps) [worker 0] episodes_seen=750 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=750 last_return=-181.0 (+1 eps) [worker 2] episodes_seen=750 last_return=-183.4 (+1 eps) [worker 0] episodes_seen=760 last_return=-204.7 (+1 eps) [worker 1] episodes_seen=760 last_return=-240.9 (+1 eps) [worker 2] episodes_seen=760 last_return=-212.4 (+1 eps) [worker 0] episodes_seen=770 last_return=-211.3 (+1 eps) [worker 1] episodes_seen=770 last_return=-167.4 (+1 eps) [worker 2] episodes_seen=770 last_return=-243.4 (+1 eps) [worker 0] episodes_seen=780 last_return=-235.9 (+1 eps) [worker 1] episodes_seen=780 last_return=-227.4 (+1 eps) [worker 2] episodes_seen=780 last_return=-228.6 (+1 eps) [worker 0] episodes_seen=790 last_return=-173.5 (+1 eps) [worker 1] episodes_seen=790 last_return=-151.8 (+1 eps) [worker 2] episodes_seen=790 last_return=-218.7 (+1 eps) [worker 0] episodes_seen=800 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=800 last_return=-234.5 (+1 eps) [worker 0] episodes_seen=810 last_return=-249.0 (+1 eps) [worker 2] episodes_seen=800 last_return=-294.8 (+1 eps) [worker 1] episodes_seen=810 last_return=-195.6 (+1 eps) [worker 0] episodes_seen=820 last_return=-222.5 (+1 eps) [worker 2] episodes_seen=810 last_return=-147.0 (+1 eps) [worker 1] episodes_seen=820 last_return=-223.5 (+1 eps) [worker 0] episodes_seen=830 last_return=-242.2 (+1 eps) [worker 2] episodes_seen=820 last_return=-388.4 (+1 eps) [worker 1] episodes_seen=830 last_return=-147.1 (+1 eps) [worker 0] episodes_seen=840 last_return=-418.3 (+1 eps) [worker 2] episodes_seen=830 last_return=-295.9 (+1 eps) [worker 1] episodes_seen=840 last_return=-314.4 (+1 eps) [worker 0] episodes_seen=850 last_return=-216.0 (+1 eps) [worker 2] episodes_seen=840 last_return=-150.8 (+1 eps) [worker 1] episodes_seen=850 last_return=-145.0 (+1 eps) [worker 0] episodes_seen=860 last_return=-175.3 (+1 eps) [worker 2] episodes_seen=850 last_return=-241.9 (+1 eps) [worker 1] episodes_seen=860 last_return=-255.1 (+1 eps) [worker 0] episodes_seen=870 last_return=-251.1 (+1 eps) [worker 2] episodes_seen=860 last_return=-181.4 (+1 eps) [worker 1] episodes_seen=870 last_return=-268.4 (+1 eps) [worker 0] episodes_seen=880 last_return=-158.2 (+1 eps) [A2C][sync] it= 4269 steps= 320175 (+ 75) avg10=-210.75 loss=152134.016 pg=-0.000 vf=253556.688 H=0.000 gn=131983.531 [worker 1] episodes_seen=880 last_return=-145.2 (+1 eps) [worker 2] episodes_seen=870 last_return=-133.8 (+1 eps) [worker 0] episodes_seen=890 last_return=-220.2 (+1 eps) [worker 2] episodes_seen=880 last_return=-244.6 (+1 eps) [worker 1] episodes_seen=890 last_return=-258.8 (+1 eps) [worker 0] episodes_seen=900 last_return=-212.5 (+1 eps) [worker 1] episodes_seen=900 last_return=-154.2 (+1 eps) [worker 2] episodes_seen=890 last_return=-272.2 (+1 eps) [worker 0] episodes_seen=910 last_return=-249.7 (+1 eps) [worker 1] episodes_seen=910 last_return=-152.2 (+1 eps) [worker 2] episodes_seen=900 last_return=-170.6 (+1 eps) [worker 0] episodes_seen=920 last_return=-198.7 (+1 eps) [worker 1] episodes_seen=920 last_return=-239.8 (+1 eps) [worker 2] episodes_seen=910 last_return=-308.2 (+1 eps) [worker 0] episodes_seen=930 last_return=-159.8 (+1 eps) [worker 1] episodes_seen=930 last_return=-118.5 (+1 eps) [worker 2] episodes_seen=920 last_return=-187.9 (+1 eps) [worker 1] episodes_seen=940 last_return=-162.7 (+1 eps) [worker 0] episodes_seen=940 last_return=-243.5 (+1 eps) [worker 2] episodes_seen=930 last_return=-267.2 (+1 eps) [worker 1] episodes_seen=950 last_return=-155.5 (+1 eps) [worker 0] episodes_seen=950 last_return=-324.6 (+1 eps) [worker 2] episodes_seen=940 last_return=-167.8 (+1 eps) [worker 0] episodes_seen=960 last_return=-162.6 (+1 eps) [worker 1] episodes_seen=960 last_return=-215.3 (+1 eps) [worker 2] episodes_seen=950 last_return=-258.2 (+1 eps) [worker 1] episodes_seen=970 last_return=-300.3 (+1 eps) [worker 0] episodes_seen=970 last_return=-239.9 (+1 eps) [worker 1] episodes_seen=980 last_return=-118.3 (+1 eps) [worker 2] episodes_seen=960 last_return=-304.9 (+1 eps) [worker 0] episodes_seen=980 last_return=-216.1 (+1 eps) [worker 1] episodes_seen=990 last_return=-169.7 (+1 eps) [worker 2] episodes_seen=970 last_return=-294.2 (+1 eps) [worker 0] episodes_seen=990 last_return=-121.9 (+1 eps) [worker 2] episodes_seen=980 last_return=-189.1 (+1 eps) [worker 1] episodes_seen=1000 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1000 last_return=-253.7 (+1 eps) [worker 2] episodes_seen=990 last_return=-263.7 (+1 eps) [worker 1] episodes_seen=1010 last_return=-168.6 (+1 eps) [worker 2] episodes_seen=1000 last_return=-220.7 (+1 eps) [worker 0] episodes_seen=1010 last_return=-215.9 (+1 eps) [worker 1] episodes_seen=1020 last_return=-217.6 (+1 eps) [worker 2] episodes_seen=1010 last_return=-108.7 (+1 eps) [worker 0] episodes_seen=1020 last_return=-119.2 (+1 eps) [worker 1] episodes_seen=1030 last_return=-155.8 (+1 eps) [worker 2] episodes_seen=1020 last_return=-170.8 (+1 eps) [worker 0] episodes_seen=1030 last_return=-115.6 (+1 eps) [worker 1] episodes_seen=1040 last_return=-216.8 (+1 eps) [worker 2] episodes_seen=1030 last_return=-385.3 (+1 eps) [worker 0] episodes_seen=1040 last_return=-273.2 (+1 eps) [worker 1] episodes_seen=1050 last_return=-184.7 (+1 eps) [worker 2] episodes_seen=1040 last_return=-230.5 (+1 eps) [worker 0] episodes_seen=1050 last_return=-109.2 (+1 eps) [worker 1] episodes_seen=1060 last_return=-330.1 (+1 eps) [worker 0] episodes_seen=1060 last_return=-300.2 (+1 eps) [worker 2] episodes_seen=1050 last_return=-158.5 (+1 eps) [worker 1] episodes_seen=1070 last_return=-246.5 (+1 eps) [worker 0] episodes_seen=1070 last_return=-240.3 (+1 eps) [worker 2] episodes_seen=1060 last_return=-233.6 (+1 eps) [worker 1] episodes_seen=1080 last_return=-149.7 (+1 eps) [worker 2] episodes_seen=1070 last_return=-245.3 (+1 eps) [worker 0] episodes_seen=1080 last_return=-339.8 (+1 eps) [worker 1] episodes_seen=1090 last_return=-105.4 (+1 eps) [worker 0] episodes_seen=1090 last_return=-281.6 (+1 eps) [worker 2] episodes_seen=1080 last_return=-127.1 (+1 eps) [worker 1] episodes_seen=1100 last_return=-294.3 (+1 eps) [worker 0] episodes_seen=1100 last_return=-227.4 (+1 eps) [A2C][sync] it= 5336 steps= 400200 (+ 75) avg10=-193.21 loss=1878.598 pg=-0.000 vf=3130.997 H=0.000 gn=26319.230 [worker 2] episodes_seen=1090 last_return=-200.3 (+1 eps) [worker 1] episodes_seen=1110 last_return=-222.0 (+1 eps) [worker 0] episodes_seen=1110 last_return=-196.0 (+1 eps) [worker 2] episodes_seen=1100 last_return=-214.1 (+1 eps) [worker 0] episodes_seen=1120 last_return=-154.5 (+1 eps) [worker 1] episodes_seen=1120 last_return=-109.9 (+1 eps) [worker 2] episodes_seen=1110 last_return=-311.9 (+1 eps) [worker 0] episodes_seen=1130 last_return=-320.9 (+1 eps) [worker 1] episodes_seen=1130 last_return=-272.3 (+1 eps) [worker 2] episodes_seen=1120 last_return=-131.8 (+1 eps) [worker 0] episodes_seen=1140 last_return=-156.8 (+1 eps) [worker 1] episodes_seen=1140 last_return=-254.0 (+1 eps) [worker 2] episodes_seen=1130 last_return=-283.3 (+1 eps) [worker 1] episodes_seen=1150 last_return=-187.5 (+1 eps) [worker 0] episodes_seen=1150 last_return=-149.4 (+1 eps) [worker 2] episodes_seen=1140 last_return=-206.2 (+1 eps) [worker 0] episodes_seen=1160 last_return=-299.0 (+1 eps) [worker 1] episodes_seen=1160 last_return=-221.2 (+1 eps) [worker 2] episodes_seen=1150 last_return=-187.6 (+1 eps) [worker 0] episodes_seen=1170 last_return=-225.2 (+1 eps) [worker 1] episodes_seen=1170 last_return=-181.6 (+1 eps) [worker 2] episodes_seen=1160 last_return=-234.5 (+1 eps) [worker 1] episodes_seen=1180 last_return=-170.7 (+1 eps) [worker 0] episodes_seen=1180 last_return=-229.4 (+1 eps) [worker 2] episodes_seen=1170 last_return=-120.5 (+1 eps) [worker 1] episodes_seen=1190 last_return=-232.8 (+1 eps) [worker 0] episodes_seen=1190 last_return=-109.6 (+1 eps) [worker 2] episodes_seen=1180 last_return=-207.7 (+1 eps) [worker 1] episodes_seen=1200 last_return=-228.3 (+1 eps) [worker 0] episodes_seen=1200 last_return=-217.8 (+1 eps) [worker 2] episodes_seen=1190 last_return=-173.4 (+1 eps) [worker 1] episodes_seen=1210 last_return=-241.5 (+1 eps) [worker 0] episodes_seen=1210 last_return=-208.2 (+1 eps) [worker 1] episodes_seen=1220 last_return=-210.9 (+1 eps) [worker 2] episodes_seen=1200 last_return=-258.9 (+1 eps) [worker 0] episodes_seen=1220 last_return=-265.6 (+1 eps) [worker 2] episodes_seen=1210 last_return=-252.2 (+1 eps) [worker 0] episodes_seen=1230 last_return=-139.4 (+1 eps) [worker 1] episodes_seen=1230 last_return=-161.8 (+1 eps) [worker 2] episodes_seen=1220 last_return=-259.1 (+1 eps) [worker 0] episodes_seen=1240 last_return=-151.4 (+1 eps) [worker 1] episodes_seen=1240 last_return=-342.5 (+1 eps) [worker 2] episodes_seen=1230 last_return=-265.4 (+1 eps) [worker 0] episodes_seen=1250 last_return=-120.2 (+1 eps) [worker 1] episodes_seen=1250 last_return=-195.7 (+1 eps) [worker 2] episodes_seen=1240 last_return=-303.4 (+1 eps) [worker 0] episodes_seen=1260 last_return=-314.3 (+1 eps) [worker 1] episodes_seen=1260 last_return=-234.9 (+1 eps) [worker 2] episodes_seen=1250 last_return=-109.9 (+1 eps) [worker 0] episodes_seen=1270 last_return=-176.5 (+1 eps) [worker 1] episodes_seen=1270 last_return=-306.2 (+1 eps) [worker 2] episodes_seen=1260 last_return=-165.5 (+1 eps) [worker 0] episodes_seen=1280 last_return=-173.0 (+1 eps) [worker 1] episodes_seen=1280 last_return=-125.9 (+1 eps) [worker 2] episodes_seen=1270 last_return=-134.1 (+1 eps) [worker 0] episodes_seen=1290 last_return=-124.9 (+1 eps) [worker 1] episodes_seen=1290 last_return=-260.5 (+1 eps) [worker 2] episodes_seen=1280 last_return=-155.3 (+1 eps) [worker 0] episodes_seen=1300 last_return=-145.9 (+1 eps) [worker 2] episodes_seen=1290 last_return=-267.0 (+1 eps) [worker 1] episodes_seen=1300 last_return=-121.0 (+1 eps) [worker 0] episodes_seen=1310 last_return=-176.2 (+1 eps) [worker 1] episodes_seen=1310 last_return=-115.5 (+1 eps) [worker 2] episodes_seen=1300 last_return=-226.2 (+1 eps) [worker 0] episodes_seen=1320 last_return=-117.2 (+1 eps) [A2C][sync] it= 6403 steps= 480225 (+ 75) avg10=-257.45 loss=1702.480 pg=-0.000 vf=2837.466 H=0.000 gn=8796.226 [worker 1] episodes_seen=1320 last_return=-245.9 (+1 eps) [worker 0] episodes_seen=1330 last_return=-293.7 (+1 eps) [worker 2] episodes_seen=1310 last_return=-116.9 (+1 eps) [worker 1] episodes_seen=1330 last_return=-240.4 (+1 eps) [worker 0] episodes_seen=1340 last_return=-309.3 (+1 eps) [worker 2] episodes_seen=1320 last_return=-296.4 (+1 eps) [worker 0] episodes_seen=1350 last_return=-205.9 (+1 eps) [worker 1] episodes_seen=1340 last_return=-127.5 (+1 eps) [worker 2] episodes_seen=1330 last_return=-246.6 (+1 eps) [worker 0] episodes_seen=1360 last_return=-111.7 (+1 eps) [worker 1] episodes_seen=1350 last_return=-202.8 (+1 eps) [worker 2] episodes_seen=1340 last_return=-181.3 (+1 eps) [worker 0] episodes_seen=1370 last_return=-123.8 (+1 eps) [worker 1] episodes_seen=1360 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1350 last_return=-267.1 (+1 eps) [worker 0] episodes_seen=1380 last_return=-266.8 (+1 eps) [worker 1] episodes_seen=1370 last_return=-170.3 (+1 eps) [worker 2] episodes_seen=1360 last_return=-170.4 (+1 eps) [worker 0] episodes_seen=1390 last_return=-290.8 (+1 eps) [worker 2] episodes_seen=1370 last_return=-293.6 (+1 eps) [worker 1] episodes_seen=1380 last_return=-148.2 (+1 eps) [worker 0] episodes_seen=1400 last_return=-257.0 (+1 eps) [worker 1] episodes_seen=1390 last_return=-215.1 (+1 eps) [worker 2] episodes_seen=1380 last_return=-149.4 (+1 eps) [worker 0] episodes_seen=1410 last_return=-118.5 (+1 eps) [worker 1] episodes_seen=1400 last_return=-111.7 (+1 eps) [worker 2] episodes_seen=1390 last_return=-243.9 (+1 eps) [worker 0] episodes_seen=1420 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1410 last_return=-170.4 (+1 eps) [worker 2] episodes_seen=1400 last_return=-329.7 (+1 eps) [worker 0] episodes_seen=1430 last_return=-173.4 (+1 eps) [worker 0] episodes_seen=1440 last_return=-184.2 (+1 eps) [worker 1] episodes_seen=1420 last_return=-283.7 (+1 eps) [worker 2] episodes_seen=1410 last_return=-108.1 (+1 eps) [worker 2] episodes_seen=1420 last_return=-212.6 (+1 eps) [worker 0] episodes_seen=1450 last_return=-301.7 (+1 eps) [worker 1] episodes_seen=1430 last_return=-194.8 (+1 eps) [worker 0] episodes_seen=1460 last_return=-239.7 (+1 eps) [worker 2] episodes_seen=1430 last_return=-150.7 (+1 eps) [worker 1] episodes_seen=1440 last_return=-291.4 (+1 eps) [worker 0] episodes_seen=1470 last_return=-142.7 (+1 eps) [worker 2] episodes_seen=1440 last_return=-128.2 (+1 eps) [worker 1] episodes_seen=1450 last_return=-237.8 (+1 eps) [worker 0] episodes_seen=1480 last_return=-145.4 (+1 eps) [worker 2] episodes_seen=1450 last_return=-175.9 (+1 eps) [worker 1] episodes_seen=1460 last_return=-141.2 (+1 eps) [worker 2] episodes_seen=1460 last_return=-130.5 (+1 eps) [worker 0] episodes_seen=1490 last_return=-142.7 (+1 eps) [worker 1] episodes_seen=1470 last_return=-221.1 (+1 eps) [worker 0] episodes_seen=1500 last_return=-108.7 (+1 eps) [worker 2] episodes_seen=1470 last_return=-262.4 (+1 eps) [worker 1] episodes_seen=1480 last_return=-131.4 (+1 eps) [worker 0] episodes_seen=1510 last_return=-164.9 (+1 eps) [worker 1] episodes_seen=1490 last_return=-209.5 (+1 eps) [worker 2] episodes_seen=1480 last_return=-252.7 (+1 eps) [worker 1] episodes_seen=1500 last_return=-311.1 (+1 eps) [worker 0] episodes_seen=1520 last_return=-145.7 (+1 eps) [worker 2] episodes_seen=1490 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1510 last_return=-125.2 (+1 eps) [worker 0] episodes_seen=1530 last_return=-425.5 (+1 eps) [worker 2] episodes_seen=1500 last_return=-258.3 (+1 eps) [worker 0] episodes_seen=1540 last_return=-114.7 (+1 eps) [worker 1] episodes_seen=1520 last_return=-152.6 (+1 eps) [worker 2] episodes_seen=1510 last_return=-219.5 (+1 eps) [worker 1] episodes_seen=1530 last_return=-284.0 (+1 eps) [worker 0] episodes_seen=1550 last_return=-190.3 (+1 eps) [A2C][sync] it= 7470 steps= 560250 (+ 75) avg10=-205.11 loss=2345.090 pg=-0.270 vf=3908.937 H=0.277 gn=8891.892 [worker 2] episodes_seen=1520 last_return=-174.2 (+1 eps) [worker 1] episodes_seen=1540 last_return=-142.4 (+1 eps) [worker 0] episodes_seen=1560 last_return=-300.1 (+1 eps) [worker 2] episodes_seen=1530 last_return=-242.4 (+1 eps) [worker 0] episodes_seen=1570 last_return=-216.3 (+1 eps) [worker 1] episodes_seen=1550 last_return=-181.2 (+1 eps) [worker 2] episodes_seen=1540 last_return=-154.9 (+1 eps) [worker 1] episodes_seen=1560 last_return=-167.7 (+1 eps) [worker 0] episodes_seen=1580 last_return=-176.7 (+1 eps) [worker 2] episodes_seen=1550 last_return=-227.6 (+1 eps) [worker 0] episodes_seen=1590 last_return=-137.1 (+1 eps) [worker 1] episodes_seen=1570 last_return=-310.9 (+1 eps) [worker 2] episodes_seen=1560 last_return=-225.6 (+1 eps) [worker 0] episodes_seen=1600 last_return=-191.9 (+1 eps) [worker 1] episodes_seen=1580 last_return=-181.6 (+1 eps) [worker 2] episodes_seen=1570 last_return=-87.8 (+1 eps) [worker 0] episodes_seen=1610 last_return=-132.5 (+1 eps) [worker 1] episodes_seen=1590 last_return=-117.7 (+1 eps) [worker 2] episodes_seen=1580 last_return=-191.3 (+1 eps) [worker 1] episodes_seen=1600 last_return=-137.5 (+1 eps) [worker 0] episodes_seen=1620 last_return=-342.0 (+1 eps) [worker 2] episodes_seen=1590 last_return=-102.6 (+1 eps) [worker 1] episodes_seen=1610 last_return=-91.6 (+1 eps) [worker 0] episodes_seen=1630 last_return=-57.6 (+1 eps) [worker 2] episodes_seen=1600 last_return=-82.1 (+1 eps) [worker 1] episodes_seen=1620 last_return=-109.1 (+1 eps) [worker 0] episodes_seen=1640 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1610 last_return=-69.3 (+1 eps) [worker 1] episodes_seen=1630 last_return=-79.8 (+1 eps) [worker 0] episodes_seen=1650 last_return=-85.0 (+1 eps) [worker 2] episodes_seen=1620 last_return=-99.0 (+1 eps) [worker 1] episodes_seen=1640 last_return=-154.9 (+1 eps) [worker 0] episodes_seen=1660 last_return=-136.6 (+1 eps) [worker 1] episodes_seen=1650 last_return=-85.6 (+1 eps) [worker 2] episodes_seen=1630 last_return=-138.7 (+1 eps) [worker 0] episodes_seen=1670 last_return=-76.2 (+1 eps) [worker 2] episodes_seen=1640 last_return=-110.6 (+1 eps) [worker 1] episodes_seen=1660 last_return=-53.6 (+1 eps) [worker 0] episodes_seen=1680 last_return=-80.0 (+1 eps) [worker 1] episodes_seen=1670 last_return=-85.1 (+1 eps) [worker 2] episodes_seen=1650 last_return=-93.7 (+1 eps) [worker 0] episodes_seen=1690 last_return=-110.1 (+1 eps) [worker 2] episodes_seen=1660 last_return=-78.7 (+1 eps) [worker 1] episodes_seen=1680 last_return=-68.2 (+1 eps) [worker 0] episodes_seen=1700 last_return=-84.1 (+1 eps) [worker 2] episodes_seen=1670 last_return=-68.0 (+1 eps) [worker 1] episodes_seen=1690 last_return=-97.8 (+1 eps) [worker 0] episodes_seen=1710 last_return=-93.5 (+1 eps) [worker 2] episodes_seen=1680 last_return=-81.5 (+1 eps) [worker 1] episodes_seen=1700 last_return=-72.4 (+1 eps) [worker 0] episodes_seen=1720 last_return=-78.4 (+1 eps) [worker 2] episodes_seen=1690 last_return=-94.9 (+1 eps) [worker 1] episodes_seen=1710 last_return=-238.3 (+1 eps) [worker 0] episodes_seen=1730 last_return=-96.2 (+1 eps) [worker 2] episodes_seen=1700 last_return=-116.9 (+1 eps) [worker 1] episodes_seen=1720 last_return=-101.4 (+1 eps) [worker 0] episodes_seen=1740 last_return=-95.1 (+1 eps) [worker 2] episodes_seen=1710 last_return=-108.8 (+1 eps) [worker 1] episodes_seen=1730 last_return=-91.3 (+1 eps) [worker 0] episodes_seen=1750 last_return=-92.1 (+1 eps) [worker 2] episodes_seen=1720 last_return=-94.4 (+1 eps) [worker 1] episodes_seen=1740 last_return=-104.3 (+1 eps) [worker 0] episodes_seen=1760 last_return=-67.8 (+1 eps) [worker 2] episodes_seen=1730 last_return=-67.1 (+1 eps) [worker 1] episodes_seen=1750 last_return=-75.3 (+1 eps) [worker 0] episodes_seen=1770 last_return=-90.9 (+1 eps) [worker 2] episodes_seen=1740 last_return=-72.3 (+1 eps) [worker 1] episodes_seen=1760 last_return=-75.9 (+1 eps) [worker 0] episodes_seen=1780 last_return=-90.8 (+1 eps) [worker 2] episodes_seen=1750 last_return=-100.4 (+1 eps) [worker 1] episodes_seen=1770 last_return=-76.7 (+1 eps) [worker 0] episodes_seen=1790 last_return=-50.2 (+1 eps) [worker 2] episodes_seen=1760 last_return=-106.4 (+1 eps) [worker 1] episodes_seen=1780 last_return=-111.8 (+1 eps) [worker 2] episodes_seen=1770 last_return=-102.1 (+1 eps) [worker 0] episodes_seen=1800 last_return=-200.7 (+1 eps) [worker 1] episodes_seen=1790 last_return=-76.8 (+1 eps) [worker 2] episodes_seen=1780 last_return=-111.3 (+1 eps) [worker 0] episodes_seen=1810 last_return=-117.7 (+1 eps) [worker 1] episodes_seen=1800 last_return=-181.7 (+1 eps) [A2C][sync] it= 8537 steps= 640275 (+ 75) avg10=-135.76 loss=106.056 pg=-0.172 vf=177.055 H=0.560 gn=555.447 [worker 0] episodes_seen=1820 last_return=-84.3 (+1 eps) [worker 2] episodes_seen=1790 last_return=-185.9 (+1 eps) [worker 1] episodes_seen=1810 last_return=-181.1 (+1 eps) [worker 2] episodes_seen=1800 last_return=-239.1 (+1 eps) [worker 0] episodes_seen=1830 last_return=-147.9 (+1 eps) [worker 0] episodes_seen=1840 last_return=-141.0 (+1 eps) [worker 2] episodes_seen=1810 last_return=-111.9 (+1 eps) [worker 1] episodes_seen=1820 last_return=-237.7 (+1 eps) [worker 2] episodes_seen=1820 last_return=-93.9 (+1 eps) [worker 0] episodes_seen=1850 last_return=-83.5 (+1 eps) [worker 1] episodes_seen=1830 last_return=-156.0 (+1 eps) [worker 2] episodes_seen=1830 last_return=-100.5 (+1 eps) [worker 0] episodes_seen=1860 last_return=-42.9 (+1 eps) [worker 1] episodes_seen=1840 last_return=-35.9 (+1 eps) [worker 2] episodes_seen=1840 last_return=-55.9 (+1 eps) [worker 1] episodes_seen=1850 last_return=-156.7 (+1 eps) [worker 0] episodes_seen=1870 last_return=-53.6 (+1 eps) [worker 2] episodes_seen=1850 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=1860 last_return=-87.0 (+1 eps) [worker 0] episodes_seen=1880 last_return=-106.8 (+1 eps) [worker 2] episodes_seen=1860 last_return=-90.5 (+1 eps) [worker 0] episodes_seen=1890 last_return=-84.5 (+1 eps) [worker 1] episodes_seen=1870 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=1870 last_return=-139.2 (+1 eps) [worker 0] episodes_seen=1900 last_return=-111.3 (+1 eps) [worker 1] episodes_seen=1880 last_return=-264.0 (+1 eps) [worker 2] episodes_seen=1880 last_return=-48.2 (+1 eps) [worker 0] episodes_seen=1910 last_return=-107.2 (+1 eps) [worker 2] episodes_seen=1890 last_return=-167.7 (+1 eps) [worker 1] episodes_seen=1890 last_return=-158.6 (+1 eps) [worker 0] episodes_seen=1920 last_return=-103.3 (+1 eps) [worker 2] episodes_seen=1900 last_return=-199.9 (+1 eps) [worker 1] episodes_seen=1900 last_return=-250.4 (+1 eps) [worker 0] episodes_seen=1930 last_return=-84.3 (+1 eps) [worker 2] episodes_seen=1910 last_return=-103.1 (+1 eps) [worker 1] episodes_seen=1910 last_return=-131.4 (+1 eps) [worker 0] episodes_seen=1940 last_return=-123.9 (+1 eps) [worker 2] episodes_seen=1920 last_return=-87.3 (+1 eps) [worker 1] episodes_seen=1920 last_return=-105.7 (+1 eps) [worker 0] episodes_seen=1950 last_return=-80.1 (+1 eps) [worker 2] episodes_seen=1930 last_return=-103.5 (+1 eps) [worker 1] episodes_seen=1930 last_return=-90.4 (+1 eps) [worker 0] episodes_seen=1960 last_return=-63.8 (+1 eps) [worker 2] episodes_seen=1940 last_return=-82.8 (+1 eps) [worker 1] episodes_seen=1940 last_return=-179.4 (+1 eps) [worker 2] episodes_seen=1950 last_return=-84.6 (+1 eps) [worker 0] episodes_seen=1970 last_return=-135.5 (+1 eps) [worker 1] episodes_seen=1950 last_return=-309.1 (+1 eps) [worker 2] episodes_seen=1960 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=1980 last_return=-129.0 (+1 eps) [worker 1] episodes_seen=1960 last_return=-100.7 (+1 eps) [worker 0] episodes_seen=1990 last_return=-97.5 (+1 eps) [worker 2] episodes_seen=1970 last_return=-65.9 (+1 eps) [worker 1] episodes_seen=1970 last_return=-79.9 (+1 eps) [worker 0] episodes_seen=2000 last_return=-293.0 (+1 eps) [worker 2] episodes_seen=1980 last_return=-234.6 (+1 eps) [worker 1] episodes_seen=1980 last_return=-241.8 (+1 eps) [worker 2] episodes_seen=1990 last_return=-245.1 (+1 eps) [worker 0] episodes_seen=2010 last_return=-136.5 (+1 eps) [worker 1] episodes_seen=1990 last_return=-265.2 (+1 eps) [worker 2] episodes_seen=2000 last_return=-146.2 (+1 eps) [worker 0] episodes_seen=2020 last_return=-250.3 (+1 eps) [worker 1] episodes_seen=2000 last_return=-233.4 (+1 eps) [worker 2] episodes_seen=2010 last_return=-246.7 (+1 eps) [worker 0] episodes_seen=2030 last_return=-166.3 (+1 eps) [worker 1] episodes_seen=2010 last_return=-238.9 (+1 eps) [worker 2] episodes_seen=2020 last_return=-217.6 (+1 eps) [worker 0] episodes_seen=2040 last_return=-225.7 (+1 eps) [worker 1] episodes_seen=2020 last_return=-238.7 (+1 eps) [worker 2] episodes_seen=2030 last_return=-223.2 (+1 eps) [worker 0] episodes_seen=2050 last_return=-136.5 (+1 eps) [worker 1] episodes_seen=2030 last_return=-204.1 (+1 eps) [worker 2] episodes_seen=2040 last_return=-202.9 (+1 eps) [A2C][sync] it= 9604 steps= 720300 (+ 75) avg10=-187.75 loss=2533.066 pg=0.000 vf=4221.777 H=0.002 gn=30326.730 [worker 0] episodes_seen=2060 last_return=-209.2 (+1 eps) [worker 1] episodes_seen=2040 last_return=-271.6 (+1 eps) [worker 2] episodes_seen=2050 last_return=-345.8 (+1 eps) [worker 0] episodes_seen=2070 last_return=-200.7 (+1 eps) [worker 1] episodes_seen=2050 last_return=-190.4 (+1 eps) [worker 2] episodes_seen=2060 last_return=-127.5 (+1 eps) [worker 0] episodes_seen=2080 last_return=-224.2 (+1 eps) [worker 1] episodes_seen=2060 last_return=-155.7 (+1 eps) [worker 2] episodes_seen=2070 last_return=-138.8 (+1 eps) [worker 0] episodes_seen=2090 last_return=-240.9 (+1 eps) [worker 1] episodes_seen=2070 last_return=-136.1 (+1 eps) [worker 2] episodes_seen=2080 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=2100 last_return=-228.1 (+1 eps) [worker 1] episodes_seen=2080 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=2090 last_return=-227.5 (+1 eps) [worker 0] episodes_seen=2110 last_return=-290.5 (+1 eps) [worker 1] episodes_seen=2090 last_return=-225.8 (+1 eps) [worker 2] episodes_seen=2100 last_return=-147.1 (+1 eps) [worker 0] episodes_seen=2120 last_return=-211.2 (+1 eps) [worker 1] episodes_seen=2100 last_return=-279.5 (+1 eps) [worker 2] episodes_seen=2110 last_return=-138.5 (+1 eps) [worker 0] episodes_seen=2130 last_return=-267.2 (+1 eps) [worker 1] episodes_seen=2110 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=2120 last_return=-141.4 (+1 eps) [worker 0] episodes_seen=2140 last_return=-227.3 (+1 eps) [worker 1] episodes_seen=2120 last_return=-356.9 (+1 eps) [worker 2] episodes_seen=2130 last_return=-198.8 (+1 eps) [worker 0] episodes_seen=2150 last_return=-238.7 (+1 eps) [worker 1] episodes_seen=2130 last_return=-246.0 (+1 eps) [worker 0] episodes_seen=2160 last_return=-203.9 (+1 eps) [worker 2] episodes_seen=2140 last_return=-274.3 (+1 eps) [worker 1] episodes_seen=2140 last_return=-144.5 (+1 eps) [worker 0] episodes_seen=2170 last_return=-114.8 (+1 eps) [worker 2] episodes_seen=2150 last_return=-145.3 (+1 eps) [worker 1] episodes_seen=2150 last_return=-231.6 (+1 eps) [worker 0] episodes_seen=2180 last_return=-186.0 (+1 eps) [worker 2] episodes_seen=2160 last_return=-253.9 (+1 eps) [worker 1] episodes_seen=2160 last_return=-188.8 (+1 eps) [worker 0] episodes_seen=2190 last_return=-149.6 (+1 eps) [worker 2] episodes_seen=2170 last_return=-134.6 (+1 eps) [worker 1] episodes_seen=2170 last_return=-188.2 (+1 eps) [worker 2] episodes_seen=2180 last_return=-155.9 (+1 eps) [worker 0] episodes_seen=2200 last_return=-229.9 (+1 eps) [worker 1] episodes_seen=2180 last_return=-248.7 (+1 eps) [worker 0] episodes_seen=2210 last_return=-214.7 (+1 eps) [worker 2] episodes_seen=2190 last_return=-148.5 (+1 eps) [worker 1] episodes_seen=2190 last_return=-364.4 (+1 eps) [worker 0] episodes_seen=2220 last_return=-219.7 (+1 eps) [worker 2] episodes_seen=2200 last_return=-216.6 (+1 eps) [worker 1] episodes_seen=2200 last_return=-141.4 (+1 eps) [worker 2] episodes_seen=2210 last_return=-235.2 (+1 eps) [worker 0] episodes_seen=2230 last_return=-302.5 (+1 eps) [worker 1] episodes_seen=2210 last_return=-217.8 (+1 eps) [worker 2] episodes_seen=2220 last_return=-147.0 (+1 eps) [worker 0] episodes_seen=2240 last_return=-160.1 (+1 eps) [worker 2] episodes_seen=2230 last_return=-120.2 (+1 eps) [worker 0] episodes_seen=2250 last_return=-291.4 (+1 eps) [worker 1] episodes_seen=2220 last_return=-196.3 (+1 eps) [worker 2] episodes_seen=2240 last_return=-201.9 (+1 eps) [worker 1] episodes_seen=2230 last_return=-112.5 (+1 eps) [worker 0] episodes_seen=2260 last_return=-122.5 (+1 eps) [worker 2] episodes_seen=2250 last_return=-172.7 (+1 eps) [worker 1] episodes_seen=2240 last_return=-213.2 (+1 eps) [worker 0] episodes_seen=2270 last_return=-310.0 (+1 eps) [A2C][sync] it=10671 steps= 800325 (+ 75) avg10=-245.36 loss=210.305 pg=-0.000 vf=350.508 H=0.000 gn=4371.502 [worker 2] episodes_seen=2260 last_return=-120.8 (+1 eps) [worker 1] episodes_seen=2250 last_return=-271.1 (+1 eps) [worker 0] episodes_seen=2280 last_return=-195.5 (+1 eps) [worker 2] episodes_seen=2270 last_return=-319.3 (+1 eps) [worker 1] episodes_seen=2260 last_return=-234.3 (+1 eps) [worker 0] episodes_seen=2290 last_return=-253.9 (+1 eps) [worker 2] episodes_seen=2280 last_return=-175.4 (+1 eps) [worker 1] episodes_seen=2270 last_return=-128.6 (+1 eps) [worker 0] episodes_seen=2300 last_return=-302.7 (+1 eps) [worker 2] episodes_seen=2290 last_return=-151.8 (+1 eps) [worker 1] episodes_seen=2280 last_return=-290.5 (+1 eps) [worker 0] episodes_seen=2310 last_return=-192.4 (+1 eps) [worker 2] episodes_seen=2300 last_return=-191.1 (+1 eps) [worker 0] episodes_seen=2320 last_return=-222.6 (+1 eps) [worker 1] episodes_seen=2290 last_return=-221.8 (+1 eps) [worker 2] episodes_seen=2310 last_return=-157.3 (+1 eps) [worker 0] episodes_seen=2330 last_return=-262.4 (+1 eps) [worker 1] episodes_seen=2300 last_return=-157.8 (+1 eps) [worker 2] episodes_seen=2320 last_return=-286.6 (+1 eps) [worker 0] episodes_seen=2340 last_return=-231.0 (+1 eps) [worker 1] episodes_seen=2310 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=2330 last_return=-203.1 (+1 eps) [worker 0] episodes_seen=2350 last_return=-234.9 (+1 eps) [worker 1] episodes_seen=2320 last_return=-282.7 (+1 eps) [worker 2] episodes_seen=2340 last_return=-223.3 (+1 eps) [worker 0] episodes_seen=2360 last_return=-165.6 (+1 eps) [worker 1] episodes_seen=2330 last_return=-231.4 (+1 eps) [worker 2] episodes_seen=2350 last_return=-124.8 (+1 eps) [worker 0] episodes_seen=2370 last_return=-188.8 (+1 eps) [worker 1] episodes_seen=2340 last_return=-268.5 (+1 eps) [worker 2] episodes_seen=2360 last_return=-463.1 (+1 eps) [worker 1] episodes_seen=2350 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=2380 last_return=-159.6 (+1 eps) [worker 1] episodes_seen=2360 last_return=-284.1 (+1 eps) [worker 2] episodes_seen=2370 last_return=-188.1 (+1 eps) [worker 0] episodes_seen=2390 last_return=-218.9 (+1 eps) [worker 1] episodes_seen=2370 last_return=-224.0 (+1 eps) [worker 2] episodes_seen=2380 last_return=-119.1 (+1 eps) [worker 0] episodes_seen=2400 last_return=-114.8 (+1 eps) [worker 1] episodes_seen=2380 last_return=-239.1 (+1 eps) [worker 2] episodes_seen=2390 last_return=-237.9 (+1 eps) [worker 0] episodes_seen=2410 last_return=-220.2 (+1 eps) [worker 1] episodes_seen=2390 last_return=-249.8 (+1 eps) [worker 2] episodes_seen=2400 last_return=-213.2 (+1 eps) [worker 0] episodes_seen=2420 last_return=-142.0 (+1 eps) [worker 2] episodes_seen=2410 last_return=-113.4 (+1 eps) [worker 1] episodes_seen=2400 last_return=-279.6 (+1 eps) [worker 0] episodes_seen=2430 last_return=-159.3 (+1 eps) [worker 1] episodes_seen=2410 last_return=-136.0 (+1 eps) [worker 2] episodes_seen=2420 last_return=-214.6 (+1 eps) [worker 0] episodes_seen=2440 last_return=-186.7 (+1 eps) [worker 1] episodes_seen=2420 last_return=-237.6 (+1 eps) [worker 2] episodes_seen=2430 last_return=-384.5 (+1 eps) [worker 0] episodes_seen=2450 last_return=-146.9 (+1 eps) [worker 2] episodes_seen=2440 last_return=-192.7 (+1 eps) [worker 1] episodes_seen=2430 last_return=-163.5 (+1 eps) [worker 0] episodes_seen=2460 last_return=-149.6 (+1 eps) [worker 2] episodes_seen=2450 last_return=-288.2 (+1 eps) [worker 1] episodes_seen=2440 last_return=-132.7 (+1 eps) [worker 0] episodes_seen=2470 last_return=-221.2 (+1 eps) [worker 2] episodes_seen=2460 last_return=-312.8 (+1 eps) [worker 1] episodes_seen=2450 last_return=-297.8 (+1 eps) [worker 0] episodes_seen=2480 last_return=-258.0 (+1 eps) [A2C][sync] it=11738 steps= 880350 (+ 75) avg10=-228.88 loss=877.057 pg=-0.000 vf=1461.761 H=0.000 gn=25689.371 [worker 2] episodes_seen=2470 last_return=-285.6 (+1 eps) [worker 1] episodes_seen=2460 last_return=-233.2 (+1 eps) [worker 0] episodes_seen=2490 last_return=-289.2 (+1 eps) [worker 2] episodes_seen=2480 last_return=-136.8 (+1 eps) [worker 1] episodes_seen=2470 last_return=-295.2 (+1 eps) [worker 0] episodes_seen=2500 last_return=-256.5 (+1 eps) [worker 2] episodes_seen=2490 last_return=-243.4 (+1 eps) [worker 1] episodes_seen=2480 last_return=-187.3 (+1 eps) [worker 0] episodes_seen=2510 last_return=-111.6 (+1 eps) [worker 2] episodes_seen=2500 last_return=-175.8 (+1 eps) [worker 0] episodes_seen=2520 last_return=-185.9 (+1 eps) [worker 1] episodes_seen=2490 last_return=-204.8 (+1 eps) [worker 2] episodes_seen=2510 last_return=-260.1 (+1 eps) [worker 0] episodes_seen=2530 last_return=-295.6 (+1 eps) [worker 1] episodes_seen=2500 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=2520 last_return=-234.5 (+1 eps) [worker 1] episodes_seen=2510 last_return=-129.1 (+1 eps) [worker 0] episodes_seen=2540 last_return=-160.4 (+1 eps) [worker 2] episodes_seen=2530 last_return=-192.9 (+1 eps) [worker 1] episodes_seen=2520 last_return=-261.8 (+1 eps) [worker 2] episodes_seen=2540 last_return=-108.3 (+1 eps) [worker 0] episodes_seen=2550 last_return=-159.9 (+1 eps) [worker 1] episodes_seen=2530 last_return=-174.9 (+1 eps) [worker 2] episodes_seen=2550 last_return=-257.4 (+1 eps) [worker 0] episodes_seen=2560 last_return=-239.1 (+1 eps) [worker 1] episodes_seen=2540 last_return=-178.2 (+1 eps) [worker 2] episodes_seen=2560 last_return=-188.1 (+1 eps) [worker 0] episodes_seen=2570 last_return=-149.1 (+1 eps) [worker 1] episodes_seen=2550 last_return=-302.8 (+1 eps) [worker 2] episodes_seen=2570 last_return=-202.1 (+1 eps) [worker 1] episodes_seen=2560 last_return=-139.1 (+1 eps) [worker 0] episodes_seen=2580 last_return=-145.7 (+1 eps) [worker 2] episodes_seen=2580 last_return=-179.7 (+1 eps) [worker 0] episodes_seen=2590 last_return=-210.9 (+1 eps) [worker 1] episodes_seen=2570 last_return=-161.6 (+1 eps) [worker 2] episodes_seen=2590 last_return=-173.2 (+1 eps) [worker 0] episodes_seen=2600 last_return=-328.3 (+1 eps) [worker 1] episodes_seen=2580 last_return=-108.3 (+1 eps) [worker 2] episodes_seen=2600 last_return=-279.7 (+1 eps) [worker 1] episodes_seen=2590 last_return=-195.7 (+1 eps) [worker 0] episodes_seen=2610 last_return=-256.4 (+1 eps) [worker 2] episodes_seen=2610 last_return=-303.2 (+1 eps) [worker 1] episodes_seen=2600 last_return=-230.9 (+1 eps) [worker 0] episodes_seen=2620 last_return=-265.8 (+1 eps) [worker 2] episodes_seen=2620 last_return=-151.3 (+1 eps) [worker 0] episodes_seen=2630 last_return=-178.5 (+1 eps) [worker 1] episodes_seen=2610 last_return=-226.4 (+1 eps) [worker 2] episodes_seen=2630 last_return=-250.5 (+1 eps) [worker 0] episodes_seen=2640 last_return=-182.0 (+1 eps) [worker 1] episodes_seen=2620 last_return=-126.9 (+1 eps) [worker 2] episodes_seen=2640 last_return=-106.9 (+1 eps) [worker 1] episodes_seen=2630 last_return=-267.0 (+1 eps) [worker 0] episodes_seen=2650 last_return=-228.2 (+1 eps) [worker 2] episodes_seen=2650 last_return=-249.2 (+1 eps) [worker 1] episodes_seen=2640 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=2660 last_return=-489.7 (+1 eps) [worker 2] episodes_seen=2660 last_return=-71.1 (+1 eps) [worker 1] episodes_seen=2650 last_return=-93.5 (+1 eps) [worker 0] episodes_seen=2670 last_return=-100.5 (+1 eps) [worker 1] episodes_seen=2660 last_return=-63.3 (+1 eps) [worker 2] episodes_seen=2670 last_return=-80.3 (+1 eps) [worker 0] episodes_seen=2680 last_return=-77.0 (+1 eps) [worker 2] episodes_seen=2680 last_return=-74.8 (+1 eps) [worker 1] episodes_seen=2670 last_return=-78.6 (+1 eps) [worker 0] episodes_seen=2690 last_return=-54.5 (+1 eps) [worker 2] episodes_seen=2690 last_return=-85.9 (+1 eps) [worker 1] episodes_seen=2680 last_return=-98.1 (+1 eps) [worker 0] episodes_seen=2700 last_return=-86.8 (+1 eps) [A2C][sync] it=12805 steps= 960375 (+ 75) avg10= -87.63 loss=589.149 pg=0.011 vf=981.903 H=0.355 gn=4181.048 [worker 2] episodes_seen=2700 last_return=-97.3 (+1 eps) [worker 1] episodes_seen=2690 last_return=-71.5 (+1 eps) [worker 0] episodes_seen=2710 last_return=-85.0 (+1 eps) [worker 2] episodes_seen=2710 last_return=-40.5 (+1 eps) [worker 1] episodes_seen=2700 last_return=-83.3 (+1 eps) [worker 0] episodes_seen=2720 last_return=-87.6 (+1 eps) [worker 1] episodes_seen=2710 last_return=-66.5 (+1 eps) [worker 2] episodes_seen=2720 last_return=-92.2 (+1 eps) [worker 0] episodes_seen=2730 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=2730 last_return=-105.7 (+1 eps) [worker 1] episodes_seen=2720 last_return=-59.0 (+1 eps) [worker 0] episodes_seen=2740 last_return=-102.0 (+1 eps) [worker 2] episodes_seen=2740 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2730 last_return=-76.5 (+1 eps) [worker 0] episodes_seen=2750 last_return=-84.9 (+1 eps) [worker 2] episodes_seen=2750 last_return=-147.1 (+1 eps) [worker 1] episodes_seen=2740 last_return=-78.5 (+1 eps) [worker 0] episodes_seen=2760 last_return=-94.5 (+1 eps) [worker 2] episodes_seen=2760 last_return=-80.1 (+1 eps) [worker 1] episodes_seen=2750 last_return=-106.1 (+1 eps) [worker 0] episodes_seen=2770 last_return=-163.1 (+1 eps) [worker 2] episodes_seen=2770 last_return=-78.2 (+1 eps) [worker 1] episodes_seen=2760 last_return=-74.9 (+1 eps) [worker 0] episodes_seen=2780 last_return=-75.1 (+1 eps) [worker 2] episodes_seen=2780 last_return=-79.7 (+1 eps) [worker 1] episodes_seen=2770 last_return=-112.4 (+1 eps) [worker 0] episodes_seen=2790 last_return=-72.1 (+1 eps) [worker 1] episodes_seen=2780 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=2790 last_return=-137.3 (+1 eps) [worker 0] episodes_seen=2800 last_return=-90.4 (+1 eps) [worker 1] episodes_seen=2790 last_return=-206.6 (+1 eps) [worker 2] episodes_seen=2800 last_return=-119.1 (+1 eps) [worker 0] episodes_seen=2810 last_return=-189.4 (+1 eps) [worker 1] episodes_seen=2800 last_return=-153.0 (+1 eps) [worker 2] episodes_seen=2810 last_return=-120.4 (+1 eps) [worker 0] episodes_seen=2820 last_return=-116.9 (+1 eps) [worker 1] episodes_seen=2810 last_return=-61.1 (+1 eps) [worker 2] episodes_seen=2820 last_return=-138.6 (+1 eps) [worker 0] episodes_seen=2830 last_return=-169.0 (+1 eps) [worker 1] episodes_seen=2820 last_return=-69.0 (+1 eps) [worker 2] episodes_seen=2830 last_return=-101.7 (+1 eps) [worker 0] episodes_seen=2840 last_return=-164.2 (+1 eps) [worker 1] episodes_seen=2830 last_return=-107.4 (+1 eps) [worker 2] episodes_seen=2840 last_return=-68.2 (+1 eps) [worker 0] episodes_seen=2850 last_return=-150.3 (+1 eps) [worker 1] episodes_seen=2840 last_return=-257.9 (+1 eps) [worker 2] episodes_seen=2850 last_return=-129.1 (+1 eps) [worker 1] episodes_seen=2850 last_return=-210.3 (+1 eps) [worker 0] episodes_seen=2860 last_return=-229.9 (+1 eps) [worker 2] episodes_seen=2860 last_return=-163.3 (+1 eps) [worker 1] episodes_seen=2860 last_return=-153.0 (+1 eps) [worker 0] episodes_seen=2870 last_return=-316.7 (+1 eps) [worker 2] episodes_seen=2870 last_return=-163.5 (+1 eps) [worker 0] episodes_seen=2880 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2870 last_return=-219.8 (+1 eps) [worker 2] episodes_seen=2880 last_return=-344.8 (+1 eps) [worker 0] episodes_seen=2890 last_return=-110.2 (+1 eps) [worker 1] episodes_seen=2880 last_return=-296.3 (+1 eps) [worker 2] episodes_seen=2890 last_return=-208.3 (+1 eps) [worker 0] episodes_seen=2900 last_return=-266.1 (+1 eps) [worker 1] episodes_seen=2890 last_return=-151.2 (+1 eps) [worker 2] episodes_seen=2900 last_return=-180.5 (+1 eps) [worker 0] episodes_seen=2910 last_return=-179.9 (+1 eps) [worker 1] episodes_seen=2900 last_return=-146.1 (+1 eps) [worker 2] episodes_seen=2910 last_return=-150.4 (+1 eps) [worker 0] episodes_seen=2920 last_return=-382.0 (+1 eps) [worker 1] episodes_seen=2910 last_return=-250.4 (+1 eps) [worker 2] episodes_seen=2920 last_return=-272.7 (+1 eps) [worker 0] episodes_seen=2930 last_return=-208.7 (+1 eps) [worker 1] episodes_seen=2920 last_return=-228.1 (+1 eps) [worker 2] episodes_seen=2930 last_return=-113.9 (+1 eps) [worker 1] episodes_seen=2930 last_return=-210.8 (+1 eps) [worker 0] episodes_seen=2940 last_return=-155.8 (+1 eps) [worker 2] episodes_seen=2940 last_return=-281.9 (+1 eps) [A2C][sync] it=13872 steps= 1040400 (+ 75) avg10=-213.80 loss=660.255 pg=-0.001 vf=1100.426 H=0.008 gn=10315.442 [worker 1] episodes_seen=2940 last_return=-300.8 (+1 eps) [worker 0] episodes_seen=2950 last_return=-134.1 (+1 eps) [worker 0] episodes_seen=2960 last_return=-297.9 (+1 eps) [worker 2] episodes_seen=2950 last_return=-213.7 (+1 eps) [worker 1] episodes_seen=2950 last_return=-308.4 (+1 eps) [worker 2] episodes_seen=2960 last_return=-159.2 (+1 eps) [worker 0] episodes_seen=2970 last_return=-230.0 (+1 eps) [worker 1] episodes_seen=2960 last_return=-330.7 (+1 eps) [worker 1] episodes_seen=2970 last_return=-179.6 (+1 eps) [worker 2] episodes_seen=2970 last_return=-181.4 (+1 eps) [worker 0] episodes_seen=2980 last_return=-208.2 (+1 eps) [worker 1] episodes_seen=2980 last_return=-262.4 (+1 eps) [worker 2] episodes_seen=2980 last_return=-185.6 (+1 eps) [worker 0] episodes_seen=2990 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=2990 last_return=-182.2 (+1 eps) [worker 2] episodes_seen=2990 last_return=-219.8 (+1 eps) [worker 0] episodes_seen=3000 last_return=-114.9 (+1 eps) [worker 2] episodes_seen=3000 last_return=-246.4 (+1 eps) [worker 1] episodes_seen=3000 last_return=-359.8 (+1 eps) [worker 0] episodes_seen=3010 last_return=-297.7 (+1 eps) [worker 0] episodes_seen=3020 last_return=-267.6 (+1 eps) [worker 1] episodes_seen=3010 last_return=-110.6 (+1 eps) [worker 2] episodes_seen=3010 last_return=-122.8 (+1 eps) [worker 1] episodes_seen=3020 last_return=-221.1 (+1 eps) [worker 0] episodes_seen=3030 last_return=-227.1 (+1 eps) [worker 2] episodes_seen=3020 last_return=-124.2 (+1 eps) [worker 1] episodes_seen=3030 last_return=-92.6 (+1 eps) [worker 0] episodes_seen=3040 last_return=-81.4 (+1 eps) [worker 2] episodes_seen=3030 last_return=-96.3 (+1 eps) [worker 1] episodes_seen=3040 last_return=-98.9 (+1 eps) [worker 0] episodes_seen=3050 last_return=-86.0 (+1 eps) [worker 2] episodes_seen=3040 last_return=-84.6 (+1 eps) [worker 1] episodes_seen=3050 last_return=-97.6 (+1 eps) [worker 0] episodes_seen=3060 last_return=-73.4 (+1 eps) [worker 2] episodes_seen=3050 last_return=-99.7 (+1 eps) [worker 1] episodes_seen=3060 last_return=-107.1 (+1 eps) [worker 0] episodes_seen=3070 last_return=-144.7 (+1 eps) [worker 2] episodes_seen=3060 last_return=-78.2 (+1 eps) [worker 1] episodes_seen=3070 last_return=-111.2 (+1 eps) [worker 0] episodes_seen=3080 last_return=-126.1 (+1 eps) [worker 2] episodes_seen=3070 last_return=-62.0 (+1 eps) [worker 1] episodes_seen=3080 last_return=-91.6 (+1 eps) [worker 0] episodes_seen=3090 last_return=-74.8 (+1 eps) [worker 2] episodes_seen=3080 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=3090 last_return=-107.0 (+1 eps) [worker 0] episodes_seen=3100 last_return=-60.8 (+1 eps) [worker 2] episodes_seen=3090 last_return=44.3 (+1 eps) [worker 1] episodes_seen=3100 last_return=-115.1 (+1 eps) [worker 0] episodes_seen=3110 last_return=-86.7 (+1 eps) [worker 2] episodes_seen=3100 last_return=-62.7 (+1 eps) [worker 1] episodes_seen=3110 last_return=-84.9 (+1 eps) [worker 0] episodes_seen=3120 last_return=-99.2 (+1 eps) [worker 2] episodes_seen=3110 last_return=-63.1 (+1 eps) [worker 1] episodes_seen=3120 last_return=-98.8 (+1 eps) [worker 0] episodes_seen=3130 last_return=-100.9 (+1 eps) [worker 2] episodes_seen=3120 last_return=-110.5 (+1 eps) [worker 1] episodes_seen=3130 last_return=-77.3 (+1 eps) [worker 0] episodes_seen=3140 last_return=-89.7 (+1 eps) [worker 2] episodes_seen=3130 last_return=-96.2 (+1 eps) [worker 1] episodes_seen=3140 last_return=-84.3 (+1 eps) [worker 0] episodes_seen=3150 last_return=-76.6 (+1 eps) [worker 2] episodes_seen=3140 last_return=-68.3 (+1 eps) [worker 1] episodes_seen=3150 last_return=-212.9 (+1 eps) [worker 2] episodes_seen=3150 last_return=-90.7 (+1 eps) [worker 0] episodes_seen=3160 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=3160 last_return=-73.7 (+1 eps) [worker 2] episodes_seen=3160 last_return=-5.6 (+1 eps) [worker 0] episodes_seen=3170 last_return=-116.4 (+1 eps) [worker 1] episodes_seen=3170 last_return=-63.3 (+1 eps) [worker 2] episodes_seen=3170 last_return=-119.7 (+1 eps) [worker 0] episodes_seen=3180 last_return=-113.2 (+1 eps) [worker 1] episodes_seen=3180 last_return=-92.4 (+1 eps) [worker 0] episodes_seen=3190 last_return=-89.8 (+1 eps) [worker 2] episodes_seen=3180 last_return=-104.4 (+1 eps) [worker 1] episodes_seen=3190 last_return=-109.7 (+1 eps) [worker 0] episodes_seen=3200 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=3190 last_return=-133.3 (+1 eps) [worker 1] episodes_seen=3200 last_return=-142.0 (+1 eps) [worker 0] episodes_seen=3210 last_return=-189.4 (+1 eps) [worker 2] episodes_seen=3200 last_return=-164.7 (+1 eps) [A2C][sync] it=14939 steps= 1120425 (+ 75) avg10=-138.15 loss=3.949 pg=-0.114 vf=6.779 H=0.455 gn=288.734 [worker 2] episodes_seen=3210 last_return=-80.8 (+1 eps) [worker 0] episodes_seen=3220 last_return=-63.9 (+1 eps) [worker 1] episodes_seen=3210 last_return=-88.5 (+1 eps) [worker 0] episodes_seen=3230 last_return=-63.4 (+1 eps) [worker 2] episodes_seen=3220 last_return=-109.5 (+1 eps) [worker 1] episodes_seen=3220 last_return=-170.7 (+1 eps) [worker 0] episodes_seen=3240 last_return=-97.4 (+1 eps) [worker 1] episodes_seen=3230 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=3230 last_return=-101.0 (+1 eps) [worker 0] episodes_seen=3250 last_return=-109.3 (+1 eps) [worker 2] episodes_seen=3240 last_return=-93.9 (+1 eps) [worker 1] episodes_seen=3240 last_return=-114.8 (+1 eps) [worker 2] episodes_seen=3250 last_return=-76.4 (+1 eps) [worker 0] episodes_seen=3260 last_return=-98.0 (+1 eps) [worker 1] episodes_seen=3250 last_return=-102.1 (+1 eps) [worker 1] episodes_seen=3260 last_return=-78.8 (+1 eps) [worker 2] episodes_seen=3260 last_return=-93.1 (+1 eps) [worker 0] episodes_seen=3270 last_return=42.8 (+1 eps) [worker 1] episodes_seen=3270 last_return=-88.6 (+1 eps) [worker 0] episodes_seen=3280 last_return=-96.9 (+1 eps) [worker 2] episodes_seen=3270 last_return=-73.3 (+1 eps) [worker 1] episodes_seen=3280 last_return=-93.2 (+1 eps) [worker 0] episodes_seen=3290 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=3280 last_return=-95.7 (+1 eps) [worker 1] episodes_seen=3290 last_return=-73.6 (+1 eps) [worker 0] episodes_seen=3300 last_return=-75.9 (+1 eps) [worker 2] episodes_seen=3290 last_return=-157.3 (+1 eps) [worker 1] episodes_seen=3300 last_return=-85.6 (+1 eps) [worker 2] episodes_seen=3300 last_return=-56.4 (+1 eps) [worker 0] episodes_seen=3310 last_return=40.6 (+1 eps) [worker 1] episodes_seen=3310 last_return=-88.4 (+1 eps) [worker 2] episodes_seen=3310 last_return=-75.9 (+1 eps) [worker 0] episodes_seen=3320 last_return=-87.9 (+1 eps) [worker 1] episodes_seen=3320 last_return=-355.6 (+1 eps) [worker 2] episodes_seen=3320 last_return=-71.0 (+1 eps) [worker 0] episodes_seen=3330 last_return=-73.9 (+1 eps) [worker 1] episodes_seen=3330 last_return=-87.3 (+1 eps) [worker 2] episodes_seen=3330 last_return=-94.2 (+1 eps) [worker 0] episodes_seen=3340 last_return=-46.2 (+1 eps) [worker 1] episodes_seen=3340 last_return=-149.6 (+1 eps) [worker 2] episodes_seen=3340 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=3350 last_return=-85.9 (+1 eps) [worker 1] episodes_seen=3350 last_return=-74.1 (+1 eps) [worker 2] episodes_seen=3350 last_return=-117.1 (+1 eps) [worker 0] episodes_seen=3360 last_return=-129.8 (+1 eps) [worker 1] episodes_seen=3360 last_return=-70.4 (+1 eps) [worker 2] episodes_seen=3360 last_return=-141.1 (+1 eps) [worker 0] episodes_seen=3370 last_return=-38.3 (+1 eps) [worker 1] episodes_seen=3370 last_return=-104.0 (+1 eps) [worker 2] episodes_seen=3370 last_return=-133.7 (+1 eps) [worker 0] episodes_seen=3380 last_return=-61.0 (+1 eps) [worker 1] episodes_seen=3380 last_return=-79.0 (+1 eps) [worker 2] episodes_seen=3380 last_return=79.6 (+1 eps) [worker 0] episodes_seen=3390 last_return=-115.2 (+1 eps) [worker 1] episodes_seen=3390 last_return=-122.4 (+1 eps) [worker 2] episodes_seen=3390 last_return=-131.7 (+1 eps) [worker 0] episodes_seen=3400 last_return=-120.2 (+1 eps) [worker 1] episodes_seen=3400 last_return=-64.0 (+1 eps) [worker 2] episodes_seen=3400 last_return=-85.4 (+1 eps) [worker 0] episodes_seen=3410 last_return=-117.2 (+1 eps) [worker 1] episodes_seen=3410 last_return=-62.5 (+1 eps) [worker 2] episodes_seen=3410 last_return=-0.6 (+1 eps) [worker 0] episodes_seen=3420 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=3420 last_return=-95.6 (+1 eps) [worker 2] episodes_seen=3420 last_return=-172.4 (+1 eps) [worker 0] episodes_seen=3430 last_return=-89.5 (+1 eps) [worker 1] episodes_seen=3430 last_return=-85.2 (+1 eps) [worker 2] episodes_seen=3430 last_return=-125.9 (+1 eps) [worker 0] episodes_seen=3440 last_return=-93.0 (+1 eps) [worker 1] episodes_seen=3440 last_return=-24.7 (+1 eps) [worker 2] episodes_seen=3440 last_return=-193.1 (+1 eps) [worker 0] episodes_seen=3450 last_return=-91.0 (+1 eps) [worker 1] episodes_seen=3450 last_return=-107.0 (+1 eps) [worker 2] episodes_seen=3450 last_return=-104.4 (+1 eps) [worker 0] episodes_seen=3460 last_return=-75.6 (+1 eps) [worker 1] episodes_seen=3460 last_return=-100.9 (+1 eps) [worker 2] episodes_seen=3460 last_return=-74.7 (+1 eps) [worker 0] episodes_seen=3470 last_return=-162.4 (+1 eps) [worker 1] episodes_seen=3470 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=3470 last_return=-212.6 (+1 eps) [worker 0] episodes_seen=3480 last_return=-93.6 (+1 eps) [worker 1] episodes_seen=3480 last_return=-102.4 (+1 eps) [worker 2] episodes_seen=3480 last_return=-116.3 (+1 eps) [worker 1] episodes_seen=3490 last_return=-79.1 (+1 eps) [worker 2] episodes_seen=3490 last_return=-45.2 (+1 eps) [worker 0] episodes_seen=3490 last_return=-85.7 (+1 eps) [A2C][sync] it=16006 steps= 1200450 (+ 75) avg10= -81.43 loss=6.233 pg=0.040 vf=10.330 H=0.494 gn=156.228 [worker 1] episodes_seen=3500 last_return=-150.8 (+1 eps) [worker 2] episodes_seen=3500 last_return=-126.5 (+1 eps) [worker 0] episodes_seen=3500 last_return=-86.0 (+1 eps) [worker 2] episodes_seen=3510 last_return=-87.9 (+1 eps) [worker 1] episodes_seen=3510 last_return=-81.8 (+1 eps) [worker 0] episodes_seen=3510 last_return=-92.5 (+1 eps) [worker 2] episodes_seen=3520 last_return=-66.2 (+1 eps) [worker 1] episodes_seen=3520 last_return=-179.4 (+1 eps) [worker 0] episodes_seen=3520 last_return=-136.0 (+1 eps) [worker 2] episodes_seen=3530 last_return=-111.5 (+1 eps) [worker 0] episodes_seen=3530 last_return=-277.1 (+1 eps) [worker 1] episodes_seen=3530 last_return=-130.2 (+1 eps) [worker 2] episodes_seen=3540 last_return=-98.2 (+1 eps) [worker 1] episodes_seen=3540 last_return=-87.6 (+1 eps) [worker 0] episodes_seen=3540 last_return=-89.7 (+1 eps) [worker 2] episodes_seen=3550 last_return=-69.7 (+1 eps) [worker 1] episodes_seen=3550 last_return=-122.7 (+1 eps) [worker 0] episodes_seen=3550 last_return=-100.8 (+1 eps) [worker 2] episodes_seen=3560 last_return=-93.6 (+1 eps) [worker 0] episodes_seen=3560 last_return=-75.1 (+1 eps) [worker 1] episodes_seen=3560 last_return=-92.5 (+1 eps) [worker 2] episodes_seen=3570 last_return=-81.9 (+1 eps) [worker 0] episodes_seen=3570 last_return=-84.4 (+1 eps) [worker 1] episodes_seen=3570 last_return=-96.5 (+1 eps) [worker 2] episodes_seen=3580 last_return=-161.9 (+1 eps) [worker 1] episodes_seen=3580 last_return=-100.9 (+1 eps) [worker 0] episodes_seen=3580 last_return=-42.4 (+1 eps) [worker 2] episodes_seen=3590 last_return=-106.2 (+1 eps) [worker 1] episodes_seen=3590 last_return=45.7 (+1 eps) [worker 0] episodes_seen=3590 last_return=-103.6 (+1 eps) [worker 2] episodes_seen=3600 last_return=-88.6 (+1 eps) [worker 1] episodes_seen=3600 last_return=-99.1 (+1 eps) [worker 0] episodes_seen=3600 last_return=-94.2 (+1 eps) [worker 2] episodes_seen=3610 last_return=-96.6 (+1 eps) [worker 1] episodes_seen=3610 last_return=-76.7 (+1 eps) [worker 0] episodes_seen=3610 last_return=-118.2 (+1 eps) [worker 2] episodes_seen=3620 last_return=-29.3 (+1 eps) [worker 1] episodes_seen=3620 last_return=-93.8 (+1 eps) [worker 0] episodes_seen=3620 last_return=-97.7 (+1 eps) [worker 2] episodes_seen=3630 last_return=-66.9 (+1 eps) [worker 1] episodes_seen=3630 last_return=-104.9 (+1 eps) [worker 0] episodes_seen=3630 last_return=-84.5 (+1 eps) [worker 2] episodes_seen=3640 last_return=-93.5 (+1 eps) [worker 1] episodes_seen=3640 last_return=-82.3 (+1 eps) [worker 0] episodes_seen=3640 last_return=-139.2 (+1 eps) [worker 2] episodes_seen=3650 last_return=-74.9 (+1 eps) [worker 1] episodes_seen=3650 last_return=-94.2 (+1 eps) [worker 0] episodes_seen=3650 last_return=-92.4 (+1 eps) [worker 1] episodes_seen=3660 last_return=-75.9 (+1 eps) [worker 2] episodes_seen=3660 last_return=-89.4 (+1 eps) [worker 0] episodes_seen=3660 last_return=-103.2 (+1 eps) [worker 2] episodes_seen=3670 last_return=-92.8 (+1 eps) [worker 1] episodes_seen=3670 last_return=-76.1 (+1 eps) [worker 0] episodes_seen=3670 last_return=-85.4 (+1 eps) [worker 1] episodes_seen=3680 last_return=-88.3 (+1 eps) [worker 2] episodes_seen=3680 last_return=-110.2 (+1 eps) [worker 0] episodes_seen=3680 last_return=-136.6 (+1 eps) [worker 1] episodes_seen=3690 last_return=-103.6 (+1 eps) [worker 2] episodes_seen=3690 last_return=-111.3 (+1 eps) [worker 0] episodes_seen=3690 last_return=-164.3 (+1 eps) [worker 1] episodes_seen=3700 last_return=-95.4 (+1 eps) [worker 2] episodes_seen=3700 last_return=-114.1 (+1 eps) [worker 1] episodes_seen=3710 last_return=-68.0 (+1 eps) [worker 0] episodes_seen=3700 last_return=-122.8 (+1 eps) [worker 2] episodes_seen=3710 last_return=-88.9 (+1 eps) [worker 1] episodes_seen=3720 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=3710 last_return=-86.7 (+1 eps) [worker 2] episodes_seen=3720 last_return=4.9 (+1 eps) [worker 1] episodes_seen=3730 last_return=-77.9 (+1 eps) [worker 0] episodes_seen=3720 last_return=-62.9 (+1 eps) [worker 2] episodes_seen=3730 last_return=-70.0 (+1 eps) [worker 1] episodes_seen=3740 last_return=-89.2 (+1 eps) [worker 0] episodes_seen=3730 last_return=-89.5 (+1 eps) [worker 2] episodes_seen=3740 last_return=-99.7 (+1 eps) [worker 0] episodes_seen=3740 last_return=-92.1 (+1 eps) [worker 2] episodes_seen=3750 last_return=-81.5 (+1 eps) [worker 1] episodes_seen=3750 last_return=-113.3 (+1 eps) [worker 0] episodes_seen=3750 last_return=-83.2 (+1 eps) [worker 2] episodes_seen=3760 last_return=-80.6 (+1 eps) [worker 1] episodes_seen=3760 last_return=-61.5 (+1 eps) [A2C][sync] it=17073 steps= 1280475 (+ 75) avg10=-104.06 loss=33.070 pg=-0.014 vf=55.142 H=0.175 gn=627.232 [worker 2] episodes_seen=3770 last_return=-152.8 (+1 eps) [worker 0] episodes_seen=3760 last_return=-108.5 (+1 eps) [worker 1] episodes_seen=3770 last_return=-119.9 (+1 eps) [worker 2] episodes_seen=3780 last_return=-98.8 (+1 eps) [worker 0] episodes_seen=3770 last_return=-83.3 (+1 eps) [worker 1] episodes_seen=3780 last_return=-40.0 (+1 eps) [worker 2] episodes_seen=3790 last_return=-119.3 (+1 eps) [worker 0] episodes_seen=3780 last_return=-125.6 (+1 eps) [worker 1] episodes_seen=3790 last_return=-82.8 (+1 eps) [worker 2] episodes_seen=3800 last_return=-114.6 (+1 eps) [worker 0] episodes_seen=3790 last_return=-68.9 (+1 eps) [worker 1] episodes_seen=3800 last_return=-93.0 (+1 eps) [worker 0] episodes_seen=3800 last_return=-87.7 (+1 eps) [worker 2] episodes_seen=3810 last_return=-48.0 (+1 eps) [worker 1] episodes_seen=3810 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=3810 last_return=-102.3 (+1 eps) [worker 1] episodes_seen=3820 last_return=-63.0 (+1 eps) [worker 2] episodes_seen=3820 last_return=-95.0 (+1 eps) [worker 0] episodes_seen=3820 last_return=-102.5 (+1 eps) [worker 2] episodes_seen=3830 last_return=36.3 (+1 eps) [worker 1] episodes_seen=3830 last_return=-32.2 (+1 eps) [worker 2] episodes_seen=3840 last_return=-99.6 (+1 eps) [worker 1] episodes_seen=3840 last_return=-29.1 (+1 eps) [worker 0] episodes_seen=3830 last_return=-102.2 (+1 eps) [worker 2] episodes_seen=3850 last_return=-115.5 (+1 eps) [worker 1] episodes_seen=3850 last_return=-94.3 (+1 eps) [worker 0] episodes_seen=3840 last_return=-30.1 (+1 eps) [worker 2] episodes_seen=3860 last_return=-101.0 (+1 eps) [worker 1] episodes_seen=3860 last_return=-65.1 (+1 eps) [worker 0] episodes_seen=3850 last_return=-108.7 (+1 eps) [worker 2] episodes_seen=3870 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=3870 last_return=-99.7 (+1 eps) [worker 0] episodes_seen=3860 last_return=-85.8 (+1 eps) [worker 2] episodes_seen=3880 last_return=-99.2 (+1 eps) [worker 1] episodes_seen=3880 last_return=-90.2 (+1 eps) [worker 0] episodes_seen=3870 last_return=-187.0 (+1 eps) [worker 1] episodes_seen=3890 last_return=18.3 (+1 eps) [worker 2] episodes_seen=3890 last_return=-81.0 (+1 eps) [worker 0] episodes_seen=3880 last_return=-94.9 (+1 eps) [worker 1] episodes_seen=3900 last_return=-43.7 (+1 eps) [worker 2] episodes_seen=3900 last_return=-101.8 (+1 eps) [worker 0] episodes_seen=3890 last_return=-84.6 (+1 eps) [worker 1] episodes_seen=3910 last_return=-69.5 (+1 eps) [worker 2] episodes_seen=3910 last_return=-77.4 (+1 eps) [worker 1] episodes_seen=3920 last_return=-101.5 (+1 eps) [worker 0] episodes_seen=3900 last_return=-106.2 (+1 eps) [worker 2] episodes_seen=3920 last_return=-130.4 (+1 eps) [worker 1] episodes_seen=3930 last_return=-101.9 (+1 eps) [worker 0] episodes_seen=3910 last_return=-89.6 (+1 eps) [worker 2] episodes_seen=3930 last_return=-109.6 (+1 eps) [worker 1] episodes_seen=3940 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=3920 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=3940 last_return=-79.3 (+1 eps) [worker 1] episodes_seen=3950 last_return=-101.6 (+1 eps) [worker 0] episodes_seen=3930 last_return=-81.4 (+1 eps) [worker 2] episodes_seen=3950 last_return=-57.2 (+1 eps) [worker 1] episodes_seen=3960 last_return=-113.6 (+1 eps) [worker 0] episodes_seen=3940 last_return=-93.0 (+1 eps) [worker 2] episodes_seen=3960 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=3970 last_return=-111.3 (+1 eps) [worker 0] episodes_seen=3950 last_return=-87.8 (+1 eps) [worker 2] episodes_seen=3970 last_return=-94.2 (+1 eps) [worker 1] episodes_seen=3980 last_return=-111.5 (+1 eps) [worker 0] episodes_seen=3960 last_return=-81.5 (+1 eps) [worker 2] episodes_seen=3980 last_return=-188.6 (+1 eps) [worker 1] episodes_seen=3990 last_return=-84.0 (+1 eps) [worker 0] episodes_seen=3970 last_return=-87.5 (+1 eps) [worker 2] episodes_seen=3990 last_return=-94.0 (+1 eps) [worker 1] episodes_seen=4000 last_return=-109.4 (+1 eps) [worker 0] episodes_seen=3980 last_return=-102.0 (+1 eps) [worker 2] episodes_seen=4000 last_return=-104.0 (+1 eps) [worker 1] episodes_seen=4010 last_return=-105.9 (+1 eps) [worker 0] episodes_seen=3990 last_return=-158.2 (+1 eps) [worker 2] episodes_seen=4010 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=4020 last_return=-103.6 (+1 eps) [worker 0] episodes_seen=4000 last_return=-63.1 (+1 eps) [worker 2] episodes_seen=4020 last_return=-103.3 (+1 eps) [worker 1] episodes_seen=4030 last_return=-142.1 (+1 eps) [worker 0] episodes_seen=4010 last_return=-148.5 (+1 eps) [worker 2] episodes_seen=4030 last_return=-115.8 (+1 eps) [worker 1] episodes_seen=4040 last_return=-140.5 (+1 eps) [A2C][sync] it=18140 steps= 1360500 (+ 75) avg10=-163.30 loss=2892.341 pg=-0.115 vf=4820.770 H=0.633 gn=9680.794 [worker 2] episodes_seen=4040 last_return=-131.7 (+1 eps) [worker 0] episodes_seen=4020 last_return=-137.0 (+1 eps) [worker 1] episodes_seen=4050 last_return=-172.6 (+1 eps) [worker 2] episodes_seen=4050 last_return=-105.8 (+1 eps) [worker 0] episodes_seen=4030 last_return=-189.6 (+1 eps) [worker 1] episodes_seen=4060 last_return=-123.8 (+1 eps) [worker 2] episodes_seen=4060 last_return=-154.3 (+1 eps) [worker 0] episodes_seen=4040 last_return=-238.2 (+1 eps) [worker 1] episodes_seen=4070 last_return=-105.0 (+1 eps) [worker 2] episodes_seen=4070 last_return=-169.3 (+1 eps) [worker 0] episodes_seen=4050 last_return=-121.2 (+1 eps) [worker 1] episodes_seen=4080 last_return=-98.8 (+1 eps) [worker 2] episodes_seen=4080 last_return=-49.9 (+1 eps) [worker 0] episodes_seen=4060 last_return=-140.6 (+1 eps) [worker 2] episodes_seen=4090 last_return=-278.8 (+1 eps) [worker 1] episodes_seen=4090 last_return=-155.3 (+1 eps) [worker 0] episodes_seen=4070 last_return=-93.4 (+1 eps) [worker 2] episodes_seen=4100 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=4100 last_return=-64.9 (+1 eps) [worker 0] episodes_seen=4080 last_return=-92.7 (+1 eps) [worker 2] episodes_seen=4110 last_return=-84.4 (+1 eps) [worker 0] episodes_seen=4090 last_return=-90.6 (+1 eps) [worker 1] episodes_seen=4110 last_return=-114.5 (+1 eps) [worker 2] episodes_seen=4120 last_return=-77.0 (+1 eps) [worker 0] episodes_seen=4100 last_return=-71.3 (+1 eps) [worker 1] episodes_seen=4120 last_return=-113.5 (+1 eps) [worker 2] episodes_seen=4130 last_return=-76.0 (+1 eps) [worker 0] episodes_seen=4110 last_return=-87.1 (+1 eps) [worker 1] episodes_seen=4130 last_return=-69.8 (+1 eps) [worker 2] episodes_seen=4140 last_return=-91.7 (+1 eps) [worker 0] episodes_seen=4120 last_return=-63.3 (+1 eps) [worker 1] episodes_seen=4140 last_return=-78.9 (+1 eps) [worker 2] episodes_seen=4150 last_return=-73.4 (+1 eps) [worker 1] episodes_seen=4150 last_return=-16.7 (+1 eps) [worker 0] episodes_seen=4130 last_return=-99.3 (+1 eps) [worker 2] episodes_seen=4160 last_return=-86.4 (+1 eps) [worker 1] episodes_seen=4160 last_return=-115.2 (+1 eps) [worker 0] episodes_seen=4140 last_return=-115.7 (+1 eps) [worker 2] episodes_seen=4170 last_return=-34.3 (+1 eps) [worker 1] episodes_seen=4170 last_return=-100.5 (+1 eps) [worker 0] episodes_seen=4150 last_return=-82.3 (+1 eps) [worker 2] episodes_seen=4180 last_return=-89.3 (+1 eps) [worker 0] episodes_seen=4160 last_return=-69.8 (+1 eps) [worker 1] episodes_seen=4180 last_return=-81.0 (+1 eps) [worker 2] episodes_seen=4190 last_return=-85.0 (+1 eps) [worker 0] episodes_seen=4170 last_return=-61.4 (+1 eps) [worker 1] episodes_seen=4190 last_return=-105.6 (+1 eps) [worker 2] episodes_seen=4200 last_return=-121.8 (+1 eps) [worker 0] episodes_seen=4180 last_return=-102.7 (+1 eps) [worker 1] episodes_seen=4200 last_return=-155.1 (+1 eps) [worker 2] episodes_seen=4210 last_return=-94.6 (+1 eps) [worker 0] episodes_seen=4190 last_return=-60.6 (+1 eps) [worker 1] episodes_seen=4210 last_return=-100.3 (+1 eps) [worker 2] episodes_seen=4220 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=4200 last_return=-49.4 (+1 eps) [worker 1] episodes_seen=4220 last_return=-141.2 (+1 eps) [worker 0] episodes_seen=4210 last_return=-105.4 (+1 eps) [worker 2] episodes_seen=4230 last_return=-137.1 (+1 eps) [worker 1] episodes_seen=4230 last_return=-238.7 (+1 eps) [worker 0] episodes_seen=4220 last_return=-188.8 (+1 eps) [worker 1] episodes_seen=4240 last_return=-105.3 (+1 eps) [worker 2] episodes_seen=4240 last_return=-100.7 (+1 eps) [worker 0] episodes_seen=4230 last_return=-89.1 (+1 eps) [worker 1] episodes_seen=4250 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=4250 last_return=-105.6 (+1 eps) [worker 0] episodes_seen=4240 last_return=-90.5 (+1 eps) [worker 1] episodes_seen=4260 last_return=-100.9 (+1 eps) [worker 2] episodes_seen=4260 last_return=15.4 (+1 eps) [worker 1] episodes_seen=4270 last_return=-96.5 (+1 eps) [worker 0] episodes_seen=4250 last_return=-198.7 (+1 eps) [worker 2] episodes_seen=4270 last_return=-137.5 (+1 eps) [worker 1] episodes_seen=4280 last_return=-64.3 (+1 eps) [worker 0] episodes_seen=4260 last_return=-80.6 (+1 eps) [worker 2] episodes_seen=4280 last_return=-252.4 (+1 eps) [A2C][sync] it=19207 steps= 1440525 (+ 75) avg10=-156.66 loss=4684.373 pg=0.279 vf=7806.828 H=0.224 gn=26738.922 [worker 1] episodes_seen=4290 last_return=-100.0 (+1 eps) [worker 0] episodes_seen=4270 last_return=-93.8 (+1 eps) [worker 2] episodes_seen=4290 last_return=-89.0 (+1 eps) [worker 1] episodes_seen=4300 last_return=-80.0 (+1 eps) [worker 0] episodes_seen=4280 last_return=-67.6 (+1 eps) [worker 2] episodes_seen=4300 last_return=-121.6 (+1 eps) [worker 1] episodes_seen=4310 last_return=-80.8 (+1 eps) [worker 2] episodes_seen=4310 last_return=-63.4 (+1 eps) [worker 0] episodes_seen=4290 last_return=-68.1 (+1 eps) [worker 1] episodes_seen=4320 last_return=-61.2 (+1 eps) [worker 2] episodes_seen=4320 last_return=21.8 (+1 eps) [worker 0] episodes_seen=4300 last_return=-102.2 (+1 eps) [worker 1] episodes_seen=4330 last_return=-85.2 (+1 eps) [worker 2] episodes_seen=4330 last_return=-86.9 (+1 eps) [worker 0] episodes_seen=4310 last_return=-87.3 (+1 eps) [worker 1] episodes_seen=4340 last_return=-106.5 (+1 eps) [worker 2] episodes_seen=4340 last_return=-85.7 (+1 eps) [worker 0] episodes_seen=4320 last_return=-81.2 (+1 eps) [worker 1] episodes_seen=4350 last_return=-94.1 (+1 eps) [worker 2] episodes_seen=4350 last_return=-93.9 (+1 eps) [worker 0] episodes_seen=4330 last_return=-116.8 (+1 eps) [worker 1] episodes_seen=4360 last_return=-55.8 (+1 eps) [worker 2] episodes_seen=4360 last_return=-108.6 (+1 eps) [worker 0] episodes_seen=4340 last_return=-100.9 (+1 eps) [worker 1] episodes_seen=4370 last_return=-97.2 (+1 eps) [worker 2] episodes_seen=4370 last_return=-65.0 (+1 eps) [worker 1] episodes_seen=4380 last_return=-152.3 (+1 eps) [worker 0] episodes_seen=4350 last_return=-97.4 (+1 eps) [worker 2] episodes_seen=4380 last_return=-77.5 (+1 eps) [worker 1] episodes_seen=4390 last_return=-66.7 (+1 eps) [worker 0] episodes_seen=4360 last_return=-77.6 (+1 eps) [worker 2] episodes_seen=4390 last_return=-71.1 (+1 eps) [worker 1] episodes_seen=4400 last_return=-102.8 (+1 eps) [worker 0] episodes_seen=4370 last_return=-209.4 (+1 eps) [worker 2] episodes_seen=4400 last_return=-79.9 (+1 eps) [worker 1] episodes_seen=4410 last_return=-119.9 (+1 eps) [worker 0] episodes_seen=4380 last_return=-93.1 (+1 eps) [worker 1] episodes_seen=4420 last_return=-77.6 (+1 eps) [worker 2] episodes_seen=4410 last_return=-62.1 (+1 eps) [worker 0] episodes_seen=4390 last_return=-122.3 (+1 eps) [worker 1] episodes_seen=4430 last_return=-131.2 (+1 eps) [worker 2] episodes_seen=4420 last_return=-112.8 (+1 eps) [worker 1] episodes_seen=4440 last_return=79.0 (+1 eps) [worker 0] episodes_seen=4400 last_return=-110.8 (+1 eps) [worker 2] episodes_seen=4430 last_return=-75.5 (+1 eps) [worker 0] episodes_seen=4410 last_return=-91.8 (+1 eps) [worker 1] episodes_seen=4450 last_return=-78.9 (+1 eps) [worker 2] episodes_seen=4440 last_return=-109.9 (+1 eps) [worker 1] episodes_seen=4460 last_return=-76.0 (+1 eps) [worker 0] episodes_seen=4420 last_return=-300.1 (+1 eps) [worker 2] episodes_seen=4450 last_return=-89.8 (+1 eps) [worker 1] episodes_seen=4470 last_return=-184.1 (+1 eps) [worker 2] episodes_seen=4460 last_return=-186.9 (+1 eps) [worker 0] episodes_seen=4430 last_return=-130.7 (+1 eps) [worker 2] episodes_seen=4470 last_return=-226.3 (+1 eps) [worker 0] episodes_seen=4440 last_return=-84.4 (+1 eps) [worker 1] episodes_seen=4480 last_return=100.0 (+1 eps) [worker 2] episodes_seen=4480 last_return=-132.0 (+1 eps) [worker 0] episodes_seen=4450 last_return=-79.0 (+1 eps) [worker 1] episodes_seen=4490 last_return=93.8 (+1 eps) [worker 2] episodes_seen=4490 last_return=-100.6 (+1 eps) [A2C][sync] it=20274 steps= 1520550 (+ 75) avg10=-173.45 loss=488.095 pg=0.002 vf=813.499 H=0.685 gn=1687.364 [worker 0] episodes_seen=4460 last_return=-130.9 (+1 eps) [worker 1] episodes_seen=4500 last_return=-103.5 (+1 eps) [worker 2] episodes_seen=4500 last_return=-131.0 (+1 eps) [worker 0] episodes_seen=4470 last_return=-89.8 (+1 eps) [worker 1] episodes_seen=4510 last_return=20.6 (+1 eps) [worker 0] episodes_seen=4480 last_return=-79.6 (+1 eps) [worker 2] episodes_seen=4510 last_return=-96.7 (+1 eps) [worker 1] episodes_seen=4520 last_return=-80.6 (+1 eps) [worker 0] episodes_seen=4490 last_return=-13.4 (+1 eps) [worker 2] episodes_seen=4520 last_return=-132.9 (+1 eps) [worker 1] episodes_seen=4530 last_return=-96.0 (+1 eps) [worker 0] episodes_seen=4500 last_return=-106.0 (+1 eps) [worker 2] episodes_seen=4530 last_return=-120.1 (+1 eps) [worker 1] episodes_seen=4540 last_return=-119.8 (+1 eps) [worker 0] episodes_seen=4510 last_return=-326.6 (+1 eps) [worker 2] episodes_seen=4540 last_return=-99.7 (+1 eps) [worker 1] episodes_seen=4550 last_return=-32.1 (+1 eps) [worker 0] episodes_seen=4520 last_return=-83.4 (+1 eps) [worker 2] episodes_seen=4550 last_return=-93.4 (+1 eps) [worker 1] episodes_seen=4560 last_return=-39.3 (+1 eps) [worker 1] episodes_seen=4570 last_return=-91.4 (+1 eps) [worker 2] episodes_seen=4560 last_return=-205.2 (+1 eps) [worker 1] episodes_seen=4580 last_return=-94.2 (+1 eps) [worker 0] episodes_seen=4530 last_return=-94.2 (+1 eps) [worker 2] episodes_seen=4570 last_return=-154.1 (+1 eps) [worker 1] episodes_seen=4590 last_return=-136.8 (+1 eps) [worker 0] episodes_seen=4540 last_return=-40.1 (+1 eps) [worker 1] episodes_seen=4600 last_return=-77.6 (+1 eps) [worker 2] episodes_seen=4580 last_return=-125.4 (+1 eps) [worker 0] episodes_seen=4550 last_return=-153.5 (+1 eps) [worker 1] episodes_seen=4610 last_return=-121.2 (+1 eps) [worker 2] episodes_seen=4590 last_return=-114.0 (+1 eps) [worker 0] episodes_seen=4560 last_return=-114.4 (+1 eps) [worker 1] episodes_seen=4620 last_return=-101.4 (+1 eps) [worker 2] episodes_seen=4600 last_return=-108.6 (+1 eps) [worker 0] episodes_seen=4570 last_return=-217.7 (+1 eps) [worker 1] episodes_seen=4630 last_return=-123.1 (+1 eps) [worker 2] episodes_seen=4610 last_return=-78.2 (+1 eps) [worker 0] episodes_seen=4580 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=4640 last_return=-85.5 (+1 eps) [worker 2] episodes_seen=4620 last_return=-122.7 (+1 eps) [worker 0] episodes_seen=4590 last_return=-100.0 (+1 eps) [worker 1] episodes_seen=4650 last_return=-100.0 (+1 eps) [worker 2] episodes_seen=4630 last_return=-65.6 (+1 eps) [worker 0] episodes_seen=4600 last_return=-107.8 (+1 eps) [worker 1] episodes_seen=4660 last_return=-92.0 (+1 eps) [worker 2] episodes_seen=4640 last_return=-103.3 (+1 eps) [worker 0] episodes_seen=4610 last_return=-109.9 (+1 eps) [worker 1] episodes_seen=4670 last_return=-110.1 (+1 eps) [worker 2] episodes_seen=4650 last_return=-140.2 (+1 eps) [worker 0] episodes_seen=4620 last_return=-154.2 (+1 eps) [worker 1] episodes_seen=4680 last_return=-109.8 (+1 eps) [worker 2] episodes_seen=4660 last_return=-142.8 (+1 eps) [worker 0] episodes_seen=4630 last_return=-111.1 (+1 eps) [worker 1] episodes_seen=4690 last_return=-194.5 (+1 eps) [worker 0] episodes_seen=4640 last_return=-101.2 (+1 eps) [worker 1] episodes_seen=4700 last_return=-67.3 (+1 eps) [worker 2] episodes_seen=4670 last_return=-1.0 (+1 eps) [worker 0] episodes_seen=4650 last_return=-75.3 (+1 eps) [worker 1] episodes_seen=4710 last_return=-89.6 (+1 eps) [worker 2] episodes_seen=4680 last_return=-93.1 (+1 eps) [worker 0] episodes_seen=4660 last_return=-56.6 (+1 eps) [A2C][sync] it=21334 steps= 1600050 (+ 75) avg10= -82.69 loss=792.476 pg=-0.234 vf=1321.192 H=0.513 gn=2706.458 [Checkpoint] Saved self-describing checkpoint → part2_artifacts/checkpoints/a2c_run20_seed1227.pth [A2C][sync] done: steps=1600050 time=1571.7s avg10=-82.69
[Run run20_seed1227] checkpoint: part2_artifacts/checkpoints/a2c_run20_seed1227.pth [Run run20_seed1227] training plot (tail 500): part2_artifacts/train_curve_run20_seed1227.png [Run run20_seed1227] training plot (full): part2_artifacts/train_curve_full_run20_seed1227.png [Run run20_seed1227] per-worker plot: part2_artifacts/train_curve_workers_run20_seed1227.png [Run run20_seed1227] workers-average plot: part2_artifacts/train_curve_workers_avg_run20_seed1227.png
[Eval run20_seed1227] mean=-123.90 std=25.57 min=-176.01 max=-81.00 [Eval run20_seed1227] CSV: part2_artifacts/eval10_run20_seed1227.csv [Eval run20_seed1227] plot: part2_artifacts/eval10_run20_seed1227.png [Best] ep=0 return=-81.00 seed=1227
/usr/local/lib/python3.12/dist-packages/gymnasium/wrappers/rendering.py:293: UserWarning: WARN: Overwriting existing videos at /content/part2_artifacts/videos/run20_seed1227 folder (try specifying a different `video_folder` for the `RecordVideo` wrapper if this is not desired)
logger.warn(
[Video run20_seed1227] episode return=-81.00 [Video run20_seed1227] saved under: part2_artifacts/videos run20_seed1227 | mean=-123.9±25.6 | best_ep=0, best_ret=-81.0